-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-25132][SQL] Case-insensitive field resolution when reading from Parquet #22148
Changes from all commits
1600190
ce4c935
9261beb
c8279d2
0176d29
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -430,6 +430,49 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext with Befo | |
} | ||
} | ||
} | ||
|
||
test(s"SPARK-25132: case-insensitive field resolution when reading from Parquet") { | ||
withTempDir { dir => | ||
val format = "parquet" | ||
val tableDir = dir.getCanonicalPath + s"/$format" | ||
val tableName = s"spark_25132_${format}" | ||
withTable(tableName) { | ||
val end = 5 | ||
val data = spark.range(end).selectExpr("id as A", "id * 2 as b", "id * 3 as B") | ||
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { | ||
data.write.format(format).mode("overwrite").save(tableDir) | ||
} | ||
sql(s"CREATE TABLE $tableName (a LONG, b LONG) USING $format LOCATION '$tableDir'") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not related to this PR, but it makes me think that case-sensitivity should be a global or at least table level config, otherwise the behavior is a little confusing. cc @gatorsmile There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. table-level conf is reasonable. Let us do it in 3.0? |
||
|
||
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { | ||
checkAnswer(sql(s"select a from $tableName"), data.select("A")) | ||
checkAnswer(sql(s"select A from $tableName"), data.select("A")) | ||
|
||
// RuntimeException is triggered at executor side, which is then wrapped as | ||
// SparkException at driver side | ||
val e1 = intercept[SparkException] { | ||
sql(s"select b from $tableName").collect() | ||
} | ||
assert( | ||
e1.getCause.isInstanceOf[RuntimeException] && | ||
e1.getCause.getMessage.contains( | ||
"""Found duplicate field(s) "b": [b, B] in case-insensitive mode""")) | ||
val e2 = intercept[SparkException] { | ||
sql(s"select B from $tableName").collect() | ||
} | ||
assert( | ||
e2.getCause.isInstanceOf[RuntimeException] && | ||
e2.getCause.getMessage.contains( | ||
"""Found duplicate field(s) "b": [b, B] in case-insensitive mode""")) | ||
} | ||
|
||
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { | ||
checkAnswer(sql(s"select a from $tableName"), (0 until end).map(_ => Row(null))) | ||
checkAnswer(sql(s"select b from $tableName"), data.select("b")) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
object TestingUDT { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: I would remove this brace per https://github.com/databricks/scala-style-guide#anonymous-methods