iterator = utf8String.codePointIterator(iteratorMode);
+ for (int i = 0; i < utf8String.numChars(); ++i) {
+ assertTrue(iterator.hasNext());
+ int codePoint = (utf8String.isValid() ? utf8String : utf8String.makeValid()).getChar(i);
+ assertEquals(codePoint, (int) iterator.next());
+ }
+ assertFalse(iterator.hasNext());
+ }
+ @Test
+ public void reverseCodePointIterator() {
+ // Valid UTF8 strings
+ testReverseCodePointIterator(fromString(""));
+ testReverseCodePointIterator(fromString("abc"));
+ testReverseCodePointIterator(fromString("a!2&^R"));
+ testReverseCodePointIterator(fromString("aéह 日å!"));
+ testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0x41}));
+ testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0xC2, (byte) 0xA3}));
+ testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0xE2, (byte) 0x82, (byte) 0xAC}));
+ // Invalid UTF8 strings
+ testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0xFF}));
+ testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0x80}));
+ testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0xC2, (byte) 0x80}));
+ testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0xE2, (byte) 0x82, (byte) 0x80}));
+ testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0x41, (byte) 0x80, (byte) 0x42}));
+ testReverseCodePointIterator(fromBytes(new byte[] {
+ (byte) 0x41, (byte) 0xC2, (byte) 0x80, (byte) 0x42}));
+ testReverseCodePointIterator(fromBytes(new byte[] {
+ (byte) 0x41, (byte) 0xE2, (byte) 0x82, (byte) 0x80, (byte) 0x42}));
+ }
+
+ @Test
+ public void toBinaryString() {
+ assertEquals(ZERO_UTF8, UTF8String.toBinaryString(0));
+ assertEquals(UTF8String.fromString("1"), UTF8String.toBinaryString(1));
+ assertEquals(UTF8String.fromString("10"), UTF8String.toBinaryString(2));
+ assertEquals(UTF8String.fromString("100"), UTF8String.toBinaryString(4));
+ assertEquals(UTF8String.fromString("111"), UTF8String.toBinaryString(7));
+ assertEquals(
+ UTF8String.fromString("1111111111111111111111111111111111111111111111111111111111110011"),
+ UTF8String.toBinaryString(-13));
+ assertEquals(
+ UTF8String.fromString("1000000000000000000000000000000000000000000000000000000000000000"),
+ UTF8String.toBinaryString(Long.MIN_VALUE));
+ assertEquals(
+ UTF8String.fromString("111111111111111111111111111111111111111111111111111111111111111"),
+ UTF8String.toBinaryString(Long.MAX_VALUE));
+ }
}
diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
index 768d26bf0e11e..3c29daeff168f 100644
--- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
+++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
@@ -20,7 +20,10 @@ package org.apache.spark.unsafe.types
import scala.collection.parallel.immutable.ParSeq
import scala.jdk.CollectionConverters.MapHasAsScala
+import com.ibm.icu.util.ULocale
+
import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.util.CollationFactory.fetchCollation
// scalastyle:off
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.must.Matchers
@@ -30,31 +33,93 @@ import org.apache.spark.unsafe.types.UTF8String.{fromString => toUTF8}
class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ignore funsuite
test("collationId stability") {
- val utf8Binary = fetchCollation(0)
+ assert(INDETERMINATE_COLLATION_ID == -1)
+
+ assert(UTF8_BINARY_COLLATION_ID == 0)
+ val utf8Binary = fetchCollation(UTF8_BINARY_COLLATION_ID)
assert(utf8Binary.collationName == "UTF8_BINARY")
assert(utf8Binary.supportsBinaryEquality)
- val utf8BinaryLcase = fetchCollation(1)
- assert(utf8BinaryLcase.collationName == "UTF8_BINARY_LCASE")
+ assert(UTF8_LCASE_COLLATION_ID == 1)
+ val utf8BinaryLcase = fetchCollation(UTF8_LCASE_COLLATION_ID)
+ assert(utf8BinaryLcase.collationName == "UTF8_LCASE")
assert(!utf8BinaryLcase.supportsBinaryEquality)
- val unicode = fetchCollation(2)
+ assert(UNICODE_COLLATION_ID == (1 << 29))
+ val unicode = fetchCollation(UNICODE_COLLATION_ID)
assert(unicode.collationName == "UNICODE")
- assert(unicode.supportsBinaryEquality);
+ assert(!unicode.supportsBinaryEquality)
- val unicodeCi = fetchCollation(3)
+ assert(UNICODE_CI_COLLATION_ID == ((1 << 29) | (1 << 17)))
+ val unicodeCi = fetchCollation(UNICODE_CI_COLLATION_ID)
assert(unicodeCi.collationName == "UNICODE_CI")
assert(!unicodeCi.supportsBinaryEquality)
}
- test("fetch invalid collation name") {
- val error = intercept[SparkException] {
- fetchCollation("UTF8_BS")
+ test("UTF8_BINARY and ICU root locale collation names") {
+ // Collation name already normalized.
+ Seq(
+ "UTF8_BINARY",
+ "UTF8_LCASE",
+ "UNICODE",
+ "UNICODE_CI",
+ "UNICODE_AI",
+ "UNICODE_CI_AI"
+ ).foreach(collationName => {
+ val col = fetchCollation(collationName)
+ assert(col.collationName == collationName)
+ })
+ // Collation name normalization.
+ Seq(
+ // ICU root locale.
+ ("UNICODE_CS", "UNICODE"),
+ ("UNICODE_CS_AS", "UNICODE"),
+ ("UNICODE_CI_AS", "UNICODE_CI"),
+ ("UNICODE_AI_CS", "UNICODE_AI"),
+ ("UNICODE_AI_CI", "UNICODE_CI_AI"),
+ // Randomized case collation names.
+ ("utf8_binary", "UTF8_BINARY"),
+ ("UtF8_LcasE", "UTF8_LCASE"),
+ ("unicode", "UNICODE"),
+ ("UnICoDe_cs_aI", "UNICODE_AI")
+ ).foreach{
+ case (name, normalized) =>
+ val col = fetchCollation(name)
+ assert(col.collationName == normalized)
}
+ }
- assert(error.getErrorClass === "COLLATION_INVALID_NAME")
- assert(error.getMessageParameters.asScala ===
- Map("proposal" -> "UTF8_BINARY", "collationName" -> "UTF8_BS"))
+ test("fetch invalid UTF8_BINARY and ICU root locale collation names") {
+ Seq(
+ ("UTF8_BINARY_CS", "UTF8_BINARY"),
+ ("UTF8_BINARY_AS", "UTF8_BINARY"), // this should be UNICODE_AS
+ ("UTF8_BINARY_CS_AS","UTF8_BINARY"), // this should be UNICODE_CS_AS
+ ("UTF8_BINARY_AS_CS","UTF8_BINARY"),
+ ("UTF8_BINARY_CI","UTF8_BINARY"),
+ ("UTF8_BINARY_AI","UTF8_BINARY"),
+ ("UTF8_BINARY_CI_AI","UTF8_BINARY"),
+ ("UTF8_BINARY_AI_CI","UTF8_BINARY"),
+ ("UTF8_BS","UTF8_LCASE"),
+ ("BINARY_UTF8","ar_SAU"),
+ ("UTF8_BINARY_A","UTF8_BINARY"),
+ ("UNICODE_X","UNICODE"),
+ ("UNICODE_CI_X","UNICODE"),
+ ("UNICODE_LCASE_X","UNICODE"),
+ ("UTF8_UNICODE","UTF8_LCASE"),
+ ("UTF8_BINARY_UNICODE","UTF8_BINARY"),
+ ("CI_UNICODE", "UNICODE"),
+ ("LCASE_UNICODE", "UNICODE"),
+ ("UNICODE_UNSPECIFIED", "UNICODE"),
+ ("UNICODE_CI_UNSPECIFIED", "UNICODE"),
+ ("UNICODE_UNSPECIFIED_CI_UNSPECIFIED", "UNICODE"),
+ ("UNICODE_INDETERMINATE", "UNICODE"),
+ ("UNICODE_CI_INDETERMINATE", "UNICODE")
+ ).foreach{case (collationName, proposals) =>
+ val error = intercept[SparkException] { fetchCollation(collationName) }
+ assert(error.getErrorClass === "COLLATION_INVALID_NAME")
+ assert(error.getMessageParameters.asScala === Map(
+ "collationName" -> collationName, "proposals" -> proposals))
+ }
}
case class CollationTestCase[R](collationName: String, s1: String, s2: String, expectedResult: R)
@@ -64,18 +129,24 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
CollationTestCase("UTF8_BINARY", "aaa", "aaa", true),
CollationTestCase("UTF8_BINARY", "aaa", "AAA", false),
CollationTestCase("UTF8_BINARY", "aaa", "bbb", false),
- CollationTestCase("UTF8_BINARY_LCASE", "aaa", "aaa", true),
- CollationTestCase("UTF8_BINARY_LCASE", "aaa", "AAA", true),
- CollationTestCase("UTF8_BINARY_LCASE", "aaa", "AaA", true),
- CollationTestCase("UTF8_BINARY_LCASE", "aaa", "AaA", true),
- CollationTestCase("UTF8_BINARY_LCASE", "aaa", "aa", false),
- CollationTestCase("UTF8_BINARY_LCASE", "aaa", "bbb", false),
+ CollationTestCase("UTF8_BINARY", "å", "a\u030A", false),
+ CollationTestCase("UTF8_LCASE", "aaa", "aaa", true),
+ CollationTestCase("UTF8_LCASE", "aaa", "AAA", true),
+ CollationTestCase("UTF8_LCASE", "aaa", "AaA", true),
+ CollationTestCase("UTF8_LCASE", "aaa", "AaA", true),
+ CollationTestCase("UTF8_LCASE", "aaa", "aa", false),
+ CollationTestCase("UTF8_LCASE", "aaa", "bbb", false),
+ CollationTestCase("UTF8_LCASE", "å", "a\u030A", false),
CollationTestCase("UNICODE", "aaa", "aaa", true),
CollationTestCase("UNICODE", "aaa", "AAA", false),
CollationTestCase("UNICODE", "aaa", "bbb", false),
+ CollationTestCase("UNICODE", "å", "a\u030A", true),
CollationTestCase("UNICODE_CI", "aaa", "aaa", true),
CollationTestCase("UNICODE_CI", "aaa", "AAA", true),
- CollationTestCase("UNICODE_CI", "aaa", "bbb", false))
+ CollationTestCase("UNICODE_CI", "aaa", "bbb", false),
+ CollationTestCase("UNICODE_CI", "å", "a\u030A", true),
+ CollationTestCase("UNICODE_CI", "Å", "a\u030A", true)
+ )
checks.foreach(testCase => {
val collation = fetchCollation(testCase.collationName)
@@ -94,12 +165,12 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
CollationTestCase("UTF8_BINARY", "aaa", "AAA", 1),
CollationTestCase("UTF8_BINARY", "aaa", "bbb", -1),
CollationTestCase("UTF8_BINARY", "aaa", "BBB", 1),
- CollationTestCase("UTF8_BINARY_LCASE", "aaa", "aaa", 0),
- CollationTestCase("UTF8_BINARY_LCASE", "aaa", "AAA", 0),
- CollationTestCase("UTF8_BINARY_LCASE", "aaa", "AaA", 0),
- CollationTestCase("UTF8_BINARY_LCASE", "aaa", "AaA", 0),
- CollationTestCase("UTF8_BINARY_LCASE", "aaa", "aa", 1),
- CollationTestCase("UTF8_BINARY_LCASE", "aaa", "bbb", -1),
+ CollationTestCase("UTF8_LCASE", "aaa", "aaa", 0),
+ CollationTestCase("UTF8_LCASE", "aaa", "AAA", 0),
+ CollationTestCase("UTF8_LCASE", "aaa", "AaA", 0),
+ CollationTestCase("UTF8_LCASE", "aaa", "AaA", 0),
+ CollationTestCase("UTF8_LCASE", "aaa", "aa", 1),
+ CollationTestCase("UTF8_LCASE", "aaa", "bbb", -1),
CollationTestCase("UNICODE", "aaa", "aaa", 0),
CollationTestCase("UNICODE", "aaa", "AAA", -1),
CollationTestCase("UNICODE", "aaa", "bbb", -1),
@@ -152,4 +223,246 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
}
})
}
+
+ test("test collation caching") {
+ Seq(
+ "UTF8_BINARY",
+ "UTF8_LCASE",
+ "UNICODE",
+ "UNICODE_CI",
+ "UNICODE_AI",
+ "UNICODE_CI_AI",
+ "UNICODE_AI_CI"
+ ).foreach(collationId => {
+ val col1 = fetchCollation(collationId)
+ val col2 = fetchCollation(collationId)
+ assert(col1 eq col2) // Check for reference equality.
+ })
+ }
+
+ test("collations with ICU non-root localization") {
+ Seq(
+ // Language only.
+ "en",
+ "en_CS",
+ "en_CI",
+ "en_AS",
+ "en_AI",
+ // Language + 3-letter country code.
+ "en_USA",
+ "en_USA_CS",
+ "en_USA_CI",
+ "en_USA_AS",
+ "en_USA_AI",
+ // Language + script code.
+ "sr_Cyrl",
+ "sr_Cyrl_CS",
+ "sr_Cyrl_CI",
+ "sr_Cyrl_AS",
+ "sr_Cyrl_AI",
+ // Language + script code + 3-letter country code.
+ "sr_Cyrl_SRB",
+ "sr_Cyrl_SRB_CS",
+ "sr_Cyrl_SRB_CI",
+ "sr_Cyrl_SRB_AS",
+ "sr_Cyrl_SRB_AI"
+ ).foreach(collationICU => {
+ val col = fetchCollation(collationICU)
+ assert(col.collator.getLocale(ULocale.VALID_LOCALE) != ULocale.ROOT)
+ })
+ }
+
+ test("invalid names of collations with ICU non-root localization") {
+ Seq(
+ ("en_US", "en_USA"), // Must use 3-letter country code
+ ("eN_US", "en_USA"), // verify that proper casing is captured in error.
+ ("enn", "en, nn, bn"),
+ ("en_AAA", "en_USA"),
+ ("en_Something", "UNICODE"),
+ ("en_Something_USA", "en_USA"),
+ ("en_LCASE", "en_USA"),
+ ("en_UCASE", "en_USA"),
+ ("en_CI_LCASE", "UNICODE"),
+ ("en_CI_UCASE", "en_USA"),
+ ("en_CI_UNSPECIFIED", "en_USA"),
+ ("en_USA_UNSPECIFIED", "en_USA"),
+ ("en_USA_UNSPECIFIED_CI", "en_USA_CI"),
+ ("en_INDETERMINATE", "en_USA"),
+ ("en_USA_INDETERMINATE", "en_USA"),
+ ("en_Latn_USA", "en_USA"),
+ ("en_Cyrl_USA", "en_USA"),
+ ("en_USA_AAA", "en_USA"),
+ ("sr_Cyrl_SRB_AAA", "sr_Cyrl_SRB"),
+ // Invalid ordering of language, script and country code.
+ ("USA_en", "en"),
+ ("sr_SRB_Cyrl", "sr_Cyrl"),
+ ("SRB_sr", "ar_SAU"),
+ ("SRB_sr_Cyrl", "bs_Cyrl"),
+ ("SRB_Cyrl_sr", "sr_Cyrl_SRB"),
+ ("Cyrl_sr", "sr_Cyrl_SRB"),
+ ("Cyrl_sr_SRB", "sr_Cyrl_SRB"),
+ ("Cyrl_SRB_sr", "sr_Cyrl_SRB"),
+ // Collation specifiers in the middle of locale.
+ ("CI_en", "ceb"),
+ ("USA_CI_en", "UNICODE"),
+ ("en_CI_USA", "en_USA"),
+ ("CI_sr_Cyrl_SRB", "sr_Cyrl_SRB"),
+ ("sr_CI_Cyrl_SRB", "sr_Cyrl_SRB"),
+ ("sr_Cyrl_CI_SRB", "sr_Cyrl_SRB"),
+ ("CI_Cyrl_sr", "sr_Cyrl_SRB"),
+ ("Cyrl_CI_sr", "he_ISR"),
+ ("Cyrl_CI_sr_SRB", "sr_Cyrl_SRB"),
+ ("Cyrl_sr_CI_SRB", "sr_Cyrl_SRB"),
+ // no locale specified
+ ("_CI_AI", "af_CI_AI, am_CI_AI, ar_CI_AI"),
+ ("", "af, am, ar")
+ ).foreach { case (collationName, proposals) => {
+ val error = intercept[SparkException] { fetchCollation(collationName) }
+ assert(error.getErrorClass === "COLLATION_INVALID_NAME")
+
+ assert(error.getMessageParameters.asScala === Map(
+ "collationName" -> collationName, "proposals" -> proposals))
+ }}
+ }
+
+ test("collations name normalization for ICU non-root localization") {
+ Seq(
+ ("en_USA", "en_USA"),
+ ("en_CS", "en"),
+ ("en_AS", "en"),
+ ("en_CS_AS", "en"),
+ ("en_AS_CS", "en"),
+ ("en_CI", "en_CI"),
+ ("en_AI", "en_AI"),
+ ("en_AI_CI", "en_CI_AI"),
+ ("en_CI_AI", "en_CI_AI"),
+ ("en_CS_AI", "en_AI"),
+ ("en_AI_CS", "en_AI"),
+ ("en_CI_AS", "en_CI"),
+ ("en_AS_CI", "en_CI"),
+ ("en_USA_AI_CI", "en_USA_CI_AI"),
+ // Randomized case.
+ ("EN_USA", "en_USA"),
+ ("SR_CYRL", "sr_Cyrl"),
+ ("sr_cyrl_srb", "sr_Cyrl_SRB"),
+ ("sR_cYRl_sRb", "sr_Cyrl_SRB")
+ ).foreach {
+ case (name, normalized) =>
+ val col = fetchCollation(name)
+ assert(col.collationName == normalized)
+ }
+ }
+
+ test("invalid collationId") {
+ val badCollationIds = Seq(
+ INDETERMINATE_COLLATION_ID, // Indeterminate collation.
+ 1 << 30, // User-defined collation range.
+ (1 << 30) | 1, // User-defined collation range.
+ (1 << 30) | (1 << 29), // User-defined collation range.
+ 1 << 1, // UTF8_BINARY mandatory zero bit 1 breach.
+ 1 << 2, // UTF8_BINARY mandatory zero bit 2 breach.
+ 1 << 3, // UTF8_BINARY mandatory zero bit 3 breach.
+ 1 << 4, // UTF8_BINARY mandatory zero bit 4 breach.
+ 1 << 5, // UTF8_BINARY mandatory zero bit 5 breach.
+ 1 << 6, // UTF8_BINARY mandatory zero bit 6 breach.
+ 1 << 7, // UTF8_BINARY mandatory zero bit 7 breach.
+ 1 << 8, // UTF8_BINARY mandatory zero bit 8 breach.
+ 1 << 9, // UTF8_BINARY mandatory zero bit 9 breach.
+ 1 << 10, // UTF8_BINARY mandatory zero bit 10 breach.
+ 1 << 11, // UTF8_BINARY mandatory zero bit 11 breach.
+ 1 << 12, // UTF8_BINARY mandatory zero bit 12 breach.
+ 1 << 13, // UTF8_BINARY mandatory zero bit 13 breach.
+ 1 << 14, // UTF8_BINARY mandatory zero bit 14 breach.
+ 1 << 15, // UTF8_BINARY mandatory zero bit 15 breach.
+ 1 << 16, // UTF8_BINARY mandatory zero bit 16 breach.
+ 1 << 17, // UTF8_BINARY mandatory zero bit 17 breach.
+ 1 << 18, // UTF8_BINARY mandatory zero bit 18 breach.
+ 1 << 19, // UTF8_BINARY mandatory zero bit 19 breach.
+ 1 << 20, // UTF8_BINARY mandatory zero bit 20 breach.
+ 1 << 23, // UTF8_BINARY mandatory zero bit 23 breach.
+ 1 << 24, // UTF8_BINARY mandatory zero bit 24 breach.
+ 1 << 25, // UTF8_BINARY mandatory zero bit 25 breach.
+ 1 << 26, // UTF8_BINARY mandatory zero bit 26 breach.
+ 1 << 27, // UTF8_BINARY mandatory zero bit 27 breach.
+ 1 << 28, // UTF8_BINARY mandatory zero bit 28 breach.
+ (1 << 29) | (1 << 12), // ICU mandatory zero bit 12 breach.
+ (1 << 29) | (1 << 13), // ICU mandatory zero bit 13 breach.
+ (1 << 29) | (1 << 14), // ICU mandatory zero bit 14 breach.
+ (1 << 29) | (1 << 15), // ICU mandatory zero bit 15 breach.
+ (1 << 29) | (1 << 18), // ICU mandatory zero bit 18 breach.
+ (1 << 29) | (1 << 19), // ICU mandatory zero bit 19 breach.
+ (1 << 29) | (1 << 20), // ICU mandatory zero bit 20 breach.
+ (1 << 29) | (1 << 21), // ICU mandatory zero bit 21 breach.
+ (1 << 29) | (1 << 22), // ICU mandatory zero bit 22 breach.
+ (1 << 29) | (1 << 23), // ICU mandatory zero bit 23 breach.
+ (1 << 29) | (1 << 24), // ICU mandatory zero bit 24 breach.
+ (1 << 29) | (1 << 25), // ICU mandatory zero bit 25 breach.
+ (1 << 29) | (1 << 26), // ICU mandatory zero bit 26 breach.
+ (1 << 29) | (1 << 27), // ICU mandatory zero bit 27 breach.
+ (1 << 29) | (1 << 28), // ICU mandatory zero bit 28 breach.
+ (1 << 29) | 0xFFFF // ICU with invalid locale id.
+ )
+ badCollationIds.foreach(collationId => {
+ // Assumptions about collation id will break and assert statement will fail.
+ intercept[AssertionError](fetchCollation(collationId))
+ })
+ }
+
+ test("repeated and/or incompatible and/or misplaced specifiers in collation name") {
+ Seq(
+ ("UTF8_LCASE_LCASE", "UTF8_LCASE"),
+ ("UNICODE_CS_CS", "UNICODE_CS"),
+ ("UNICODE_CI_CI", "UNICODE_CI"),
+ ("UNICODE_CI_CS", "UNICODE_CS"),
+ ("UNICODE_CS_CI", "UNICODE_CS"),
+ ("UNICODE_AS_AS", "UNICODE_AS"),
+ ("UNICODE_AI_AI", "UNICODE_AI"),
+ ("UNICODE_AS_AI", "UNICODE_AS"),
+ ("UNICODE_AI_AS", "UNICODE_AS"),
+ ("UNICODE_AS_CS_AI", "UNICODE_AS_CS"),
+ ("UNICODE_CS_AI_CI", "UNICODE_CS_AI"),
+ ("UNICODE_CS_AS_CI_AI", "UNICODE_CS_AS"),
+ ("UNICODE__CS__AS", "UNICODE_AS"),
+ ("UNICODE-CS-AS", "UNICODE"),
+ ("UNICODECSAS", "UNICODE"),
+ ("_CS_AS_UNICODE", "UNICODE")
+ ).foreach { case (collationName, proposals) =>
+ val error = intercept[SparkException] {
+ fetchCollation(collationName)
+ }
+
+ assert(error.getErrorClass === "COLLATION_INVALID_NAME")
+ assert(error.getMessageParameters.asScala === Map(
+ "collationName" -> collationName, "proposals" -> proposals))
+ }
+ }
+
+ test("basic ICU collator checks") {
+ Seq(
+ CollationTestCase("UNICODE_CI", "a", "A", true),
+ CollationTestCase("UNICODE_CI", "a", "å", false),
+ CollationTestCase("UNICODE_CI", "a", "Å", false),
+ CollationTestCase("UNICODE_AI", "a", "A", false),
+ CollationTestCase("UNICODE_AI", "a", "å", true),
+ CollationTestCase("UNICODE_AI", "a", "Å", false),
+ CollationTestCase("UNICODE_CI_AI", "a", "A", true),
+ CollationTestCase("UNICODE_CI_AI", "a", "å", true),
+ CollationTestCase("UNICODE_CI_AI", "a", "Å", true)
+ ).foreach(testCase => {
+ val collation = fetchCollation(testCase.collationName)
+ assert(collation.equalsFunction(toUTF8(testCase.s1), toUTF8(testCase.s2)) ==
+ testCase.expectedResult)
+ })
+ Seq(
+ CollationTestCase("en", "a", "A", -1),
+ CollationTestCase("en_CI", "a", "A", 0),
+ CollationTestCase("en_AI", "a", "å", 0),
+ CollationTestCase("sv", "Kypper", "Köpfe", -1),
+ CollationTestCase("de", "Kypper", "Köpfe", 1)
+ ).foreach(testCase => {
+ val collation = fetchCollation(testCase.collationName)
+ val result = collation.comparator.compare(toUTF8(testCase.s1), toUTF8(testCase.s2))
+ assert(Integer.signum(result) == testCase.expectedResult)
+ })
+ }
}
diff --git a/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java b/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java
new file mode 100644
index 0000000000000..8c210a4fab3c3
--- /dev/null
+++ b/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.Consumer;
+
+import org.apache.logging.log4j.CloseableThreadContext;
+import org.apache.logging.log4j.message.MessageFactory;
+import org.apache.logging.log4j.message.ParameterizedMessageFactory;
+// checkstyle.off: RegexpSinglelineJava
+import org.slf4j.Logger;
+// checkstyle.on: RegexpSinglelineJava
+
+// checkstyle.off: RegexpSinglelineJava
+/**
+ * Guidelines for the Structured Logging Framework - Java Logging
+ *
+ *
+ * Use the `org.apache.spark.internal.SparkLoggerFactory` to get the logger instance in Java code:
+ * Getting Logger Instance:
+ * Instead of using `org.slf4j.LoggerFactory`, use `org.apache.spark.internal.SparkLoggerFactory`
+ * to ensure structured logging.
+ *
+ *
+ * import org.apache.spark.internal.SparkLogger;
+ * import org.apache.spark.internal.SparkLoggerFactory;
+ * private static final SparkLogger logger = SparkLoggerFactory.getLogger(JavaUtils.class);
+ *
+ *
+ * Logging Messages with Variables:
+ * When logging messages with variables, wrap all the variables with `MDC`s and they will be
+ * automatically added to the Mapped Diagnostic Context (MDC).
+ *
+ *
+ * import org.apache.spark.internal.LogKeys;
+ * import org.apache.spark.internal.MDC;
+ * logger.error("Unable to delete file for partition {}", MDC.of(LogKeys.PARTITION_ID$.MODULE$, i));
+ *
+ *
+ * Constant String Messages:
+ * For logging constant string messages, use the standard logging methods.
+ *
+ *
+ * logger.error("Failed to abort the writer after failing to write map output.", e);
+ *
+ *
+ * If you want to output logs in `java code` through the structured log framework,
+ * you can define `custom LogKey` and use it in `java` code as follows:
+ *
+ *
+ * // To add a `custom LogKey`, implement `LogKey`
+ * public static class CUSTOM_LOG_KEY implements LogKey { }
+ * import org.apache.spark.internal.MDC;
+ * logger.error("Unable to delete key {} for cache", MDC.of(CUSTOM_LOG_KEY, "key"));
+ */
+// checkstyle.on: RegexpSinglelineJava
+public class SparkLogger {
+
+ private static final MessageFactory MESSAGE_FACTORY = ParameterizedMessageFactory.INSTANCE;
+ private final Logger slf4jLogger;
+
+ SparkLogger(Logger slf4jLogger) {
+ this.slf4jLogger = slf4jLogger;
+ }
+
+ public boolean isErrorEnabled() {
+ return slf4jLogger.isErrorEnabled();
+ }
+
+ public void error(String msg) {
+ slf4jLogger.error(msg);
+ }
+
+ public void error(String msg, Throwable throwable) {
+ slf4jLogger.error(msg, throwable);
+ }
+
+ public void error(String msg, MDC... mdcs) {
+ if (mdcs == null || mdcs.length == 0) {
+ slf4jLogger.error(msg);
+ } else if (slf4jLogger.isErrorEnabled()) {
+ withLogContext(msg, mdcs, null, mt -> slf4jLogger.error(mt.message));
+ }
+ }
+
+ public void error(String msg, Throwable throwable, MDC... mdcs) {
+ if (mdcs == null || mdcs.length == 0) {
+ slf4jLogger.error(msg, throwable);
+ } else if (slf4jLogger.isErrorEnabled()) {
+ withLogContext(msg, mdcs, throwable, mt -> slf4jLogger.error(mt.message, mt.throwable));
+ }
+ }
+
+ public boolean isWarnEnabled() {
+ return slf4jLogger.isWarnEnabled();
+ }
+
+ public void warn(String msg) {
+ slf4jLogger.warn(msg);
+ }
+
+ public void warn(String msg, Throwable throwable) {
+ slf4jLogger.warn(msg, throwable);
+ }
+
+ public void warn(String msg, MDC... mdcs) {
+ if (mdcs == null || mdcs.length == 0) {
+ slf4jLogger.warn(msg);
+ } else if (slf4jLogger.isWarnEnabled()) {
+ withLogContext(msg, mdcs, null, mt -> slf4jLogger.warn(mt.message));
+ }
+ }
+
+ public void warn(String msg, Throwable throwable, MDC... mdcs) {
+ if (mdcs == null || mdcs.length == 0) {
+ slf4jLogger.warn(msg, throwable);
+ } else if (slf4jLogger.isWarnEnabled()) {
+ withLogContext(msg, mdcs, throwable, mt -> slf4jLogger.warn(mt.message, mt.throwable));
+ }
+ }
+
+ public boolean isInfoEnabled() {
+ return slf4jLogger.isInfoEnabled();
+ }
+
+ public void info(String msg) {
+ slf4jLogger.info(msg);
+ }
+
+ public void info(String msg, Throwable throwable) {
+ slf4jLogger.info(msg, throwable);
+ }
+
+ public void info(String msg, MDC... mdcs) {
+ if (mdcs == null || mdcs.length == 0) {
+ slf4jLogger.info(msg);
+ } else if (slf4jLogger.isInfoEnabled()) {
+ withLogContext(msg, mdcs, null, mt -> slf4jLogger.info(mt.message));
+ }
+ }
+
+ public void info(String msg, Throwable throwable, MDC... mdcs) {
+ if (mdcs == null || mdcs.length == 0) {
+ slf4jLogger.info(msg, throwable);
+ } else if (slf4jLogger.isInfoEnabled()) {
+ withLogContext(msg, mdcs, throwable, mt -> slf4jLogger.info(mt.message, mt.throwable));
+ }
+ }
+
+ public boolean isDebugEnabled() {
+ return slf4jLogger.isDebugEnabled();
+ }
+
+ public void debug(String msg) {
+ slf4jLogger.debug(msg);
+ }
+
+ public void debug(String format, Object arg) {
+ slf4jLogger.debug(format, arg);
+ }
+
+ public void debug(String format, Object arg1, Object arg2) {
+ slf4jLogger.debug(format, arg1, arg2);
+ }
+
+ public void debug(String format, Object... arguments) {
+ slf4jLogger.debug(format, arguments);
+ }
+
+ public void debug(String msg, Throwable throwable) {
+ slf4jLogger.debug(msg, throwable);
+ }
+
+ public boolean isTraceEnabled() {
+ return slf4jLogger.isTraceEnabled();
+ }
+
+ public void trace(String msg) {
+ slf4jLogger.trace(msg);
+ }
+
+ public void trace(String format, Object arg) {
+ slf4jLogger.trace(format, arg);
+ }
+
+ public void trace(String format, Object arg1, Object arg2) {
+ slf4jLogger.trace(format, arg1, arg2);
+ }
+
+ public void trace(String format, Object... arguments) {
+ slf4jLogger.trace(format, arguments);
+ }
+
+ public void trace(String msg, Throwable throwable) {
+ slf4jLogger.trace(msg, throwable);
+ }
+
+ private void withLogContext(
+ String pattern,
+ MDC[] mdcs,
+ Throwable throwable,
+ Consumer func) {
+ Map context = new HashMap<>();
+ Object[] args = new Object[mdcs.length];
+ for (int index = 0; index < mdcs.length; index++) {
+ MDC mdc = mdcs[index];
+ String value = (mdc.value() != null) ? mdc.value().toString() : null;
+ if (Logging$.MODULE$.isStructuredLoggingEnabled()) {
+ context.put(mdc.key().name(), value);
+ }
+ args[index] = value;
+ }
+ MessageThrowable messageThrowable = MessageThrowable.of(
+ MESSAGE_FACTORY.newMessage(pattern, args).getFormattedMessage(), throwable);
+ try (CloseableThreadContext.Instance ignored = CloseableThreadContext.putAll(context)) {
+ func.accept(messageThrowable);
+ }
+ }
+
+ private record MessageThrowable(String message, Throwable throwable) {
+ static MessageThrowable of(String message, Throwable throwable) {
+ return new MessageThrowable(message, throwable);
+ }
+ }
+
+ public Logger getSlf4jLogger() {
+ return slf4jLogger;
+ }
+}
diff --git a/common/utils/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java b/common/utils/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java
new file mode 100644
index 0000000000000..a59c007362419
--- /dev/null
+++ b/common/utils/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal;
+
+// checkstyle.off: RegexpSinglelineJava
+import org.slf4j.LoggerFactory;
+// checkstyle.on: RegexpSinglelineJava
+
+public class SparkLoggerFactory {
+
+ public static SparkLogger getLogger(String name) {
+ return new SparkLogger(LoggerFactory.getLogger(name));
+ }
+
+ public static SparkLogger getLogger(Class> clazz) {
+ return new SparkLogger(LoggerFactory.getLogger(clazz));
+ }
+}
diff --git a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
index 8e1cc470e0ccf..90dddc2cb08c1 100644
--- a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++ b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -29,15 +29,18 @@
import java.util.regex.Pattern;
import org.apache.commons.lang3.SystemUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
/**
* General utilities available in the network package. Many of these are sourced from Spark's
* own Utils, just accessible within this package.
*/
public class JavaUtils {
- private static final Logger logger = LoggerFactory.getLogger(JavaUtils.class);
+ private static final SparkLogger logger = SparkLoggerFactory.getLogger(JavaUtils.class);
/**
* Define a default value for driver memory here since this value is referenced across the code
@@ -112,7 +115,7 @@ public static void deleteRecursively(File file, FilenameFilter filter) throws IO
return;
} catch (IOException e) {
logger.warn("Attempt to delete using native Unix OS command failed for path = {}. " +
- "Falling back to Java IO way", file.getAbsolutePath(), e);
+ "Falling back to Java IO way", e, MDC.of(LogKeys.PATH$.MODULE$, file.getAbsolutePath()));
}
}
@@ -228,6 +231,8 @@ private static boolean isSymlink(File file) throws IOException {
Map.entry("pb", ByteUnit.PiB));
}
+ private static final Pattern TIME_STRING_PATTERN = Pattern.compile("(-?[0-9]+)([a-z]+)?");
+
/**
* Convert a passed time string (e.g. 50s, 100ms, or 250us) to a time count in the given unit.
* The unit is also considered the default if the given string does not specify a unit.
@@ -236,7 +241,7 @@ public static long timeStringAs(String str, TimeUnit unit) {
String lower = str.toLowerCase(Locale.ROOT).trim();
try {
- Matcher m = Pattern.compile("(-?[0-9]+)([a-z]+)?").matcher(lower);
+ Matcher m = TIME_STRING_PATTERN.matcher(lower);
if (!m.matches()) {
throw new NumberFormatException("Failed to parse time string: " + str);
}
@@ -276,6 +281,11 @@ public static long timeStringAsSec(String str) {
return timeStringAs(str, TimeUnit.SECONDS);
}
+ private static final Pattern BYTE_STRING_PATTERN =
+ Pattern.compile("([0-9]+)([a-z]+)?");
+ private static final Pattern BYTE_STRING_FRACTION_PATTERN =
+ Pattern.compile("([0-9]+\\.[0-9]+)([a-z]+)?");
+
/**
* Convert a passed byte string (e.g. 50b, 100kb, or 250mb) to the given. If no suffix is
* provided, a direct conversion to the provided unit is attempted.
@@ -284,8 +294,8 @@ public static long byteStringAs(String str, ByteUnit unit) {
String lower = str.toLowerCase(Locale.ROOT).trim();
try {
- Matcher m = Pattern.compile("([0-9]+)([a-z]+)?").matcher(lower);
- Matcher fractionMatcher = Pattern.compile("([0-9]+\\.[0-9]+)([a-z]+)?").matcher(lower);
+ Matcher m = BYTE_STRING_PATTERN.matcher(lower);
+ Matcher fractionMatcher = BYTE_STRING_FRACTION_PATTERN.matcher(lower);
if (m.matches()) {
long val = Long.parseLong(m.group(1));
@@ -396,7 +406,7 @@ public static File createDirectory(String root, String namePrefix) throws IOExce
dir = new File(root, namePrefix + "-" + UUID.randomUUID());
Files.createDirectories(dir.toPath());
} catch (IOException | SecurityException e) {
- logger.error("Failed to create directory " + dir, e);
+ logger.error("Failed to create directory {}", e, MDC.of(LogKeys.PATH$.MODULE$, dir));
dir = null;
}
}
diff --git a/common/utils/src/main/resources/error/README.md b/common/utils/src/main/resources/error/README.md
index e2f68a1af9f4a..575e2ebad35a3 100644
--- a/common/utils/src/main/resources/error/README.md
+++ b/common/utils/src/main/resources/error/README.md
@@ -16,9 +16,9 @@ The error state / SQLSTATE itself is comprised of two parts:
2. Error sub-class
Acceptable values for these various error parts are defined in the following files:
-* `error-classes.json`
-* `error-states.json`
-* `error-conditions.json`
+* [`error-classes.json`](error-classes.json)
+* [`error-states.json`](error-states.json)
+* [`error-conditions.json`](error-conditions.json)
The terms error class, state, and condition come from the SQL standard.
@@ -34,6 +34,7 @@ The terms error class, state, and condition come from the SQL standard.
* Error condition: `AS_OF_JOIN`
* Error sub-condition: `TOLERANCE_IS_NON_NEGATIVE`
* Error sub-condition: `TOLERANCE_IS_UNFOLDABLE`
+ * Error sub-condition: `UNSUPPORTED_DIRECTION`
### Inconsistent Use of the Term "Error Class"
@@ -41,7 +42,7 @@ Unfortunately, we have historically used the term "error class" inconsistently t
Fixing this will require renaming `SparkException.errorClass` to `SparkException.errorCondition` and making similar changes to `ErrorClassesJsonReader` and other parts of the codebase. We will address this in [SPARK-47429]. Until that is complete, we will have to live with the fact that a string like `DATATYPE_MISSING_SIZE` is called an "error condition" in our user-facing documentation but an "error class" in the code.
-For more details, please see [SPARK-46810][SPARK-46810].
+For more details, please see [SPARK-46810].
[SPARK-46810]: https://issues.apache.org/jira/browse/SPARK-46810
[SPARK-47429]: https://issues.apache.org/jira/browse/SPARK-47429
@@ -51,9 +52,9 @@ For more details, please see [SPARK-46810][SPARK-46810].
1. Check if the error is an internal error.
Internal errors are bugs in the code that we do not expect users to encounter; this does not include unsupported operations.
If true, use the error condition `INTERNAL_ERROR` and skip to step 4.
-2. Check if an appropriate error condition already exists in `error-conditions.json`.
+2. Check if an appropriate error condition already exists in [`error-conditions.json`](error-conditions.json).
If true, use the error condition and skip to step 4.
-3. Add a new condition to `error-conditions.json`. If the new condition requires a new error state, add the new error state to `error-states.json`.
+3. Add a new condition to [`error-conditions.json`](error-conditions.json). If the new condition requires a new error state, add the new error state to [`error-states.json`](error-states.json).
4. Check if the exception type already extends `SparkThrowable`.
If true, skip to step 6.
5. Mix `SparkThrowable` into the exception.
@@ -165,7 +166,7 @@ For example: The existing `XXKD0` is used for an internal analyzer error.
#### ANSI/ISO standard
-The SQLSTATEs in `error-states.json` are collated from:
+The SQLSTATEs in [`error-states.json`](error-states.json) are collated from:
- SQL2016
- DB2 zOS/LUW
- PostgreSQL 15
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
index e1c8c881f98f3..9a3011635daa3 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -90,6 +90,11 @@
"message" : [
"The input argument `tolerance` must be a constant."
]
+ },
+ "UNSUPPORTED_DIRECTION" : {
+ "message" : [
+ "Unsupported as-of join direction ''. Supported as-of join direction include: ."
+ ]
}
},
"sqlState" : "42604"
@@ -101,6 +106,13 @@
],
"sqlState" : "22KD3"
},
+ "AVRO_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE" : {
+ "message" : [
+ "Cannot call the SQL function because the Avro data source is not loaded.",
+ "Please restart your job or session with the 'spark-avro' package loaded, such as by using the --packages argument on the command line, and then retry your query or command again."
+ ],
+ "sqlState" : "22KD3"
+ },
"BATCH_METADATA_NOT_FOUND" : {
"message" : [
"Unable to find batch ."
@@ -119,12 +131,24 @@
],
"sqlState" : "42KDE"
},
+ "CANNOT_ALTER_COLLATION_BUCKET_COLUMN" : {
+ "message" : [
+ "ALTER TABLE (ALTER|CHANGE) COLUMN cannot change collation of type/subtypes of bucket columns, but found the bucket column in the table ."
+ ],
+ "sqlState" : "428FR"
+ },
"CANNOT_ALTER_PARTITION_COLUMN" : {
"message" : [
"ALTER TABLE (ALTER|CHANGE) COLUMN is not supported for partition columns, but found the partition column in the table ."
],
"sqlState" : "428FR"
},
+ "CANNOT_ASSIGN_EVENT_TIME_COLUMN_WITHOUT_WATERMARK" : {
+ "message" : [
+ "Watermark needs to be defined to reassign event time column. Failed to find watermark definition in the streaming query."
+ ],
+ "sqlState" : "42611"
+ },
"CANNOT_CAST_DATATYPE" : {
"message" : [
"Cannot cast to ."
@@ -212,6 +236,11 @@
"Error reading delta file of : does not exist."
]
},
+ "CANNOT_READ_MISSING_SNAPSHOT_FILE" : {
+ "message" : [
+ "Error reading snapshot file of : does not exist."
+ ]
+ },
"CANNOT_READ_SNAPSHOT_FILE_KEY_SIZE" : {
"message" : [
"Error reading snapshot file of : key size cannot be ."
@@ -227,6 +256,11 @@
"Error reading streaming state file of does not exist. If the stream job is restarted with a new or updated state operation, please create a new checkpoint location or clear the existing checkpoint location."
]
},
+ "SNAPSHOT_PARTITION_ID_NOT_FOUND" : {
+ "message" : [
+ "Partition id not found for state of operator at ."
+ ]
+ },
"UNCATEGORIZED" : {
"message" : [
""
@@ -463,7 +497,13 @@
},
"COLLATION_INVALID_NAME" : {
"message" : [
- "The value does not represent a correct collation name. Suggested valid collation name: []."
+ "The value does not represent a correct collation name. Suggested valid collation names: []."
+ ],
+ "sqlState" : "42704"
+ },
+ "COLLATION_INVALID_PROVIDER" : {
+ "message" : [
+ "The value does not represent a correct collation provider. Supported providers are: []."
],
"sqlState" : "42704"
},
@@ -736,6 +776,11 @@
"Input to the function cannot contain elements of the \"MAP\" type. In Spark, same maps may have different hashcode, thus hash expressions are prohibited on \"MAP\" elements. To restore previous behavior set \"spark.sql.legacy.allowHashOnMapType\" to \"true\"."
]
},
+ "HASH_VARIANT_TYPE" : {
+ "message" : [
+ "Input to the function cannot contain elements of the \"VARIANT\" type yet."
+ ]
+ },
"INPUT_SIZE_NOT_ONE" : {
"message" : [
"Length of should be 1."
@@ -753,7 +798,7 @@
},
"INVALID_JSON_SCHEMA" : {
"message" : [
- "Input schema must be a struct, an array or a map."
+ "Input schema must be a struct, an array, a map or a variant."
]
},
"INVALID_MAP_KEY_TYPE" : {
@@ -1036,7 +1081,7 @@
},
"DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT" : {
"message" : [
- "Call to function is invalid because it includes multiple argument assignments to the same parameter name ."
+ "Call to routine is invalid because it includes multiple argument assignments to the same parameter name ."
],
"subClass" : {
"BOTH_POSITIONAL_AND_NAMED" : {
@@ -1052,6 +1097,14 @@
},
"sqlState" : "4274K"
},
+ "EMITTING_ROWS_OLDER_THAN_WATERMARK_NOT_ALLOWED" : {
+ "message" : [
+ "Previous node emitted a row with eventTime= which is older than current_watermark_value=",
+ "This can lead to correctness issues in the stateful operators downstream in the execution pipeline.",
+ "Please correct the operator logic to emit rows after current global watermark value."
+ ],
+ "sqlState" : "42815"
+ },
"EMPTY_JSON_FIELD_VALUE" : {
"message" : [
"Failed to parse an empty string for data type ."
@@ -1224,6 +1277,11 @@
"List namespaces."
]
},
+ "LOAD_TABLE" : {
+ "message" : [
+ "Load the table ."
+ ]
+ },
"NAMESPACE_EXISTS" : {
"message" : [
"Check that the namespace exists."
@@ -1313,7 +1371,20 @@
],
"sqlState" : "2203G"
},
- "FIELDS_ALREADY_EXISTS" : {
+ "FAILED_TO_PARSE_TOO_COMPLEX" : {
+ "message" : [
+ "The statement, including potential SQL functions and referenced views, was too complex to parse.",
+ "To mitigate this error divide the statement into multiple, less complex chunks."
+ ],
+ "sqlState" : "54001"
+ },
+ "FEATURE_NOT_ENABLED" : {
+ "message" : [
+ "The feature is not enabled. Consider setting the config to to enable this capability."
+ ],
+ "sqlState" : "56038"
+ },
+ "FIELD_ALREADY_EXISTS" : {
"message" : [
"Cannot column, because already exists in ."
],
@@ -1883,7 +1954,7 @@
"subClass" : {
"DEFAULT_COLLATION" : {
"message" : [
- "Cannot resolve the given default collation. Did you mean ''?"
+ "Cannot resolve the given default collation. Suggested valid collation names: ['']?"
]
},
"TIME_ZONE" : {
@@ -1983,6 +2054,11 @@
"Delimiter cannot be empty string."
]
},
+ "NULL_VALUE" : {
+ "message" : [
+ "Delimiter cannot be null."
+ ]
+ },
"SINGLE_BACKSLASH" : {
"message" : [
"Single backslash is prohibited. It has special meaning as beginning of an escape sequence. To get the backslash character, pass a string with two backslashes as the delimiter."
@@ -2304,12 +2380,24 @@
},
"sqlState" : "42K0K"
},
+ "INVALID_JOIN_TYPE_FOR_JOINWITH" : {
+ "message" : [
+ "Invalid join type in joinWith: ."
+ ],
+ "sqlState" : "42613"
+ },
"INVALID_JSON_DATA_TYPE" : {
"message" : [
"Failed to convert the JSON string '' to a data type. Please enter a valid data type."
],
"sqlState" : "2203G"
},
+ "INVALID_JSON_DATA_TYPE_FOR_COLLATIONS" : {
+ "message" : [
+ "Collations can only be applied to string types, but the JSON data type is ."
+ ],
+ "sqlState" : "2203G"
+ },
"INVALID_JSON_ROOT_FIELD" : {
"message" : [
"Cannot convert JSON root field to target Spark type."
@@ -2621,6 +2709,12 @@
],
"sqlState" : "42000"
},
+ "INVALID_SINGLE_VARIANT_COLUMN" : {
+ "message" : [
+ "The `singleVariantColumn` option cannot be used if there is also a user specified schema."
+ ],
+ "sqlState" : "42613"
+ },
"INVALID_SQL_ARG" : {
"message" : [
"The argument of `sql()` is invalid. Consider to replace it either by a SQL literal or by collection constructor functions such as `map()`, `array()`, `struct()`."
@@ -2637,9 +2731,9 @@
"ANALYZE TABLE(S) ... COMPUTE STATISTICS ... must be either NOSCAN or empty."
]
},
- "CREATE_FUNC_WITH_IF_NOT_EXISTS_AND_REPLACE" : {
+ "CREATE_ROUTINE_WITH_IF_NOT_EXISTS_AND_REPLACE" : {
"message" : [
- "CREATE FUNCTION with both IF NOT EXISTS and REPLACE is not allowed."
+ "Cannot create a routine with both IF NOT EXISTS and REPLACE specified."
]
},
"CREATE_TEMP_FUNC_WITH_DATABASE" : {
@@ -2825,6 +2919,12 @@
],
"sqlState" : "42000"
},
+ "INVALID_UTF8_STRING" : {
+ "message" : [
+ "Invalid UTF8 byte sequence found in string: ."
+ ],
+ "sqlState" : "22029"
+ },
"INVALID_VARIABLE_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE" : {
"message" : [
"Variable type must be string type but got ."
@@ -2922,6 +3022,12 @@
],
"sqlState" : "42710"
},
+ "MALFORMED_CHARACTER_CODING" : {
+ "message" : [
+ "Invalid value found when performing with "
+ ],
+ "sqlState" : "22000"
+ },
"MALFORMED_CSV_RECORD" : {
"message" : [
"Malformed CSV record: "
@@ -3154,6 +3260,12 @@
],
"sqlState" : "42809"
},
+ "NOT_NULL_ASSERT_VIOLATION" : {
+ "message" : [
+ "NULL value appeared in non-nullable field: If the schema is inferred from a Scala tuple/case class, or a Java bean, please try to use scala.Option[_] or other nullable types (such as java.lang.Integer instead of int/scala.Int)."
+ ],
+ "sqlState" : "42000"
+ },
"NOT_NULL_CONSTRAINT_VIOLATION" : {
"message" : [
"Assigning a NULL is not allowed here."
@@ -3256,6 +3368,12 @@
],
"sqlState" : "42000"
},
+ "NULL_DATA_SOURCE_OPTION" : {
+ "message" : [
+ "Data source read/write option