-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-22002][SQL] Read JDBC table use custom schema support specify partial fields. #19231
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ import org.scalatest.{BeforeAndAfter, PrivateMethodTester} | |
|
||
import org.apache.spark.{SparkException, SparkFunSuite} | ||
import org.apache.spark.sql.{AnalysisException, DataFrame, Row} | ||
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser | ||
import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap | ||
import org.apache.spark.sql.execution.DataSourceScanExec | ||
import org.apache.spark.sql.execution.command.ExplainCommand | ||
|
@@ -970,30 +971,28 @@ class JDBCSuite extends SparkFunSuite | |
|
||
test("jdbc API support custom schema") { | ||
val parts = Array[String]("THEID < 2", "THEID >= 2") | ||
val customSchema = "NAME STRING, THEID INT" | ||
val props = new Properties() | ||
props.put("customSchema", "NAME STRING, THEID BIGINT") | ||
val schema = StructType(Seq( | ||
StructField("NAME", StringType, true), StructField("THEID", LongType, true))) | ||
props.put("customSchema", customSchema) | ||
val df = spark.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", parts, props) | ||
assert(df.schema.size === 2) | ||
assert(df.schema === schema) | ||
assert(df.schema === CatalystSqlParser.parseTableSchema(customSchema)) | ||
assert(df.count() === 3) | ||
} | ||
|
||
test("jdbc API custom schema DDL-like strings.") { | ||
withTempView("people_view") { | ||
val customSchema = "NAME STRING, THEID INT" | ||
sql( | ||
s""" | ||
|CREATE TEMPORARY VIEW people_view | ||
|USING org.apache.spark.sql.jdbc | ||
|OPTIONS (uRl '$url', DbTaBlE 'TEST.PEOPLE', User 'testUser', PassWord 'testPass', | ||
|customSchema 'NAME STRING, THEID INT') | ||
|customSchema '$customSchema') | ||
""".stripMargin.replaceAll("\n", " ")) | ||
val schema = StructType( | ||
Seq(StructField("NAME", StringType, true), StructField("THEID", IntegerType, true))) | ||
val df = sql("select * from people_view") | ||
assert(df.schema.size === 2) | ||
assert(df.schema === schema) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. revert it back. Change the following line https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala#L309
You also need to update some test cases due to the above change, I think. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We shouldn't change it, because |
||
assert(df.schema.length === 2) | ||
assert(df.schema === CatalystSqlParser.parseTableSchema(customSchema)) | ||
assert(df.count() === 3) | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reset metadata to empty, otherwise it is not equal to the schema generated by
CatalystSqlParser.parseTableSchema
.Anyway, the type is fixed and don't need it to infer column types.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not changing the following line https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala#L309
to
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because
scale
is used to infer column types, we shouldn't remove it: https://github.com/apache/spark/blob/v2.2.0/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala#L32