Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-16024] [SQL] [TEST] Verify Column Comment for Data Source Tables #13764

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical.Project
import org.apache.spark.sql.execution.SparkSqlParser
import org.apache.spark.sql.execution.datasources.{BucketSpec, CreateTableUsing}
import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
import org.apache.spark.sql.types.{IntegerType, MetadataBuilder, StringType, StructType}


// TODO: merge this with DDLSuite (SPARK-14441)
Expand Down Expand Up @@ -349,10 +349,14 @@ class DDLCommandSuite extends PlanTest {
}

test("create table using - with partitioned by") {
val query = "CREATE TABLE my_tab(a INT, b STRING) USING parquet PARTITIONED BY (a)"
val query = "CREATE TABLE my_tab(a INT comment 'test', b STRING) " +
"USING parquet PARTITIONED BY (a)"
val expected = CreateTableUsing(
TableIdentifier("my_tab"),
Some(new StructType().add("a", IntegerType).add("b", StringType)),
Some(new StructType()
.add("a", IntegerType, nullable = true,
new MetadataBuilder().putString("comment", s"test").build())
.add("b", StringType)),
"parquet",
false,
Map.empty,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,19 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach {
}
}

test("desc table for parquet data source table using in-memory catalog") {
assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "in-memory")
val tabName = "tab1"
withTable(tabName) {
sql(s"CREATE TABLE $tabName(a int comment 'test') USING parquet ")

checkAnswer(
sql(s"DESC $tabName").select("col_name", "data_type", "comment"),
Row("a", "int", "test")
)
}
}

test("Alter/Describe Database") {
withTempDir { tmpDir =>
val path = tmpDir.toString
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.scalatest.BeforeAndAfter

import org.apache.spark.sql._
import org.apache.spark.sql.sources._
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils


Expand Down Expand Up @@ -223,6 +223,31 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
}
}

test("column nullability and comment - write and then read") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also remove this test, let's focus on SQL CREATE TABLE in this PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, let me change it. Thanks!

import testImplicits._

Seq("json", "parquet", "csv").foreach { format =>
val schema = StructType(
StructField("cl1", IntegerType, nullable = false,
new MetadataBuilder().putString("comment", "test").build()) ::
StructField("cl2", IntegerType, nullable = true) ::
StructField("cl3", IntegerType, nullable = true) :: Nil)
val row = Row(3, null, 4)
val df = spark.createDataFrame(sparkContext.parallelize(row :: Nil), schema)

val tableName = "tab"
withTable(tableName) {
df.write.format(format).mode("overwrite").saveAsTable(tableName)
// Verify the DDL command result: DESCRIBE TABLE
checkAnswer(
sql(s"desc $tableName").select("col_name", "comment").where($"comment" === "test"),
Row("cl1", "test") :: Nil)
// Verify the schema
assert(spark.table(tableName).schema == schema.asNullable)
}
}
}

test("load API") {
spark.read.format("org.apache.spark.sql.test").load()
spark.read.format("org.apache.spark.sql.test").load(dir)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import java.io.File
import org.apache.hadoop.fs.Path
import org.scalatest.BeforeAndAfterEach

import org.apache.spark.internal.config._
import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTableType}
import org.apache.spark.sql.catalyst.TableIdentifier
Expand Down Expand Up @@ -407,6 +408,19 @@ class HiveDDLSuite
}
}

test("desc table for data source table using Hive Metastore") {
assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive")
val tabName = "tab1"
withTable(tabName) {
sql(s"CREATE TABLE $tabName(a int comment 'test') USING parquet ")

checkAnswer(
sql(s"DESC $tabName").select("col_name", "data_type", "comment"),
Row("a", "int", "test")
)
}
}

private def createDatabaseWithLocation(tmpDir: File, dirExists: Boolean): Unit = {
val catalog = spark.sessionState.catalog
val dbName = "db1"
Expand Down