Skip to content

Commit

Permalink
[SPARK-24849][SPARK-24911][SQL] Converting a value of StructType to a…
Browse files Browse the repository at this point in the history
… DDL string

## What changes were proposed in this pull request?

In the PR, I propose to extend the `StructType`/`StructField` classes by new method `toDDL` which converts a value of the `StructType`/`StructField` type to a string formatted in DDL style. The resulted string can be used in a table creation.

The `toDDL` method of `StructField` is reused in `SHOW CREATE TABLE`. In this way the PR fixes the bug of unquoted names of nested fields.

## How was this patch tested?

I add a test for checking the new method and 2 round trip tests: `fromDDL` -> `toDDL` and `toDDL` -> `fromDDL`

Author: Maxim Gekk <[email protected]>

Closes #21803 from MaxGekk/to-ddl.
  • Loading branch information
MaxGekk authored and gatorsmile committed Jul 25, 2018
1 parent 571a6f0 commit 2f77616
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,18 @@ package object util {

def toPrettySQL(e: Expression): String = usePrettyExpression(e).sql


def escapeSingleQuotedString(str: String): String = {
val builder = StringBuilder.newBuilder

str.foreach {
case '\'' => builder ++= s"\\\'"
case ch => builder += ch
}

builder.toString()
}

/* FIX ME
implicit class debugLogging(a: Any) {
def debugLogging() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import org.json4s.JsonAST.JValue
import org.json4s.JsonDSL._

import org.apache.spark.annotation.InterfaceStability
import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier}

/**
* A field inside a StructType.
Expand Down Expand Up @@ -74,4 +75,16 @@ case class StructField(
def getComment(): Option[String] = {
if (metadata.contains("comment")) Option(metadata.getString("comment")) else None
}

/**
* Returns a string containing a schema in DDL format. For example, the following value:
* `StructField("eventId", IntegerType)` will be converted to `eventId` INT.
*/
def toDDL: String = {
val comment = getComment()
.map(escapeSingleQuotedString)
.map(" COMMENT '" + _ + "'")

s"${quoteIdentifier(name)} ${dataType.sql}${comment.getOrElse("")}"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.spark.SparkException
import org.apache.spark.annotation.InterfaceStability
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, InterpretedOrdering}
import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, LegacyTypeStringParser}
import org.apache.spark.sql.catalyst.util.quoteIdentifier
import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier}
import org.apache.spark.util.Utils

/**
Expand Down Expand Up @@ -360,6 +360,14 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
s"STRUCT<${fieldTypes.mkString(", ")}>"
}

/**
* Returns a string containing a schema in DDL format. For example, the following value:
* `StructType(Seq(StructField("eventId", IntegerType), StructField("s", StringType)))`
* will be converted to `eventId` INT, `s` STRING.
* The returned DDL schema can be used in a table creation.
*/
def toDDL: String = fields.map(_.toDDL).mkString(",")

private[sql] override def simpleString(maxNumberFields: Int): String = {
val builder = new StringBuilder
val fieldTypes = fields.take(maxNumberFields).map {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.sql.types

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types.StructType.fromDDL

class StructTypeSuite extends SparkFunSuite {

Expand All @@ -37,4 +38,36 @@ class StructTypeSuite extends SparkFunSuite {
val e = intercept[IllegalArgumentException](s.fieldIndex("c")).getMessage
assert(e.contains("Available fields: a, b"))
}

test("SPARK-24849: toDDL - simple struct") {
val struct = StructType(Seq(StructField("a", IntegerType)))

assert(struct.toDDL == "`a` INT")
}

test("SPARK-24849: round trip toDDL - fromDDL") {
val struct = new StructType().add("a", IntegerType).add("b", StringType)

assert(fromDDL(struct.toDDL) === struct)
}

test("SPARK-24849: round trip fromDDL - toDDL") {
val struct = "`a` MAP<INT, STRING>,`b` INT"

assert(fromDDL(struct).toDDL === struct)
}

test("SPARK-24849: toDDL must take into account case of fields.") {
val struct = new StructType()
.add("metaData", new StructType().add("eventId", StringType))

assert(struct.toDDL == "`metaData` STRUCT<`eventId`: STRING>")
}

test("SPARK-24849: toDDL should output field's comment") {
val struct = StructType(Seq(
StructField("b", BooleanType).withComment("Field's comment")))

assert(struct.toDDL == """`b` BOOLEAN COMMENT 'Field\'s comment'""")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.catalyst.plans.logical.Histogram
import org.apache.spark.sql.catalyst.util.quoteIdentifier
import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier}
import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils}
import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
Expand Down Expand Up @@ -982,7 +982,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
private def showHiveTableHeader(metadata: CatalogTable, builder: StringBuilder): Unit = {
val columns = metadata.schema.filterNot { column =>
metadata.partitionColumnNames.contains(column.name)
}.map(columnToDDLFragment)
}.map(_.toDDL)

if (columns.nonEmpty) {
builder ++= columns.mkString("(", ", ", ")\n")
Expand All @@ -994,14 +994,10 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
.foreach(builder.append)
}

private def columnToDDLFragment(column: StructField): String = {
val comment = column.getComment().map(escapeSingleQuotedString).map(" COMMENT '" + _ + "'")
s"${quoteIdentifier(column.name)} ${column.dataType.catalogString}${comment.getOrElse("")}"
}

private def showHiveTableNonDataColumns(metadata: CatalogTable, builder: StringBuilder): Unit = {
if (metadata.partitionColumnNames.nonEmpty) {
val partCols = metadata.partitionSchema.map(columnToDDLFragment)
val partCols = metadata.partitionSchema.map(_.toDDL)
builder ++= partCols.mkString("PARTITIONED BY (", ", ", ")\n")
}

Expand Down Expand Up @@ -1072,7 +1068,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman

private def showDataSourceTableDataColumns(
metadata: CatalogTable, builder: StringBuilder): Unit = {
val columns = metadata.schema.fields.map(f => s"${quoteIdentifier(f.name)} ${f.dataType.sql}")
val columns = metadata.schema.fields.map(_.toDDL)
builder ++= columns.mkString("(", ", ", ")\n")
}

Expand Down Expand Up @@ -1117,15 +1113,4 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
}
}
}

private def escapeSingleQuotedString(str: String): String = {
val builder = StringBuilder.newBuilder

str.foreach {
case '\'' => builder ++= s"\\\'"
case ch => builder += ch
}

builder.toString()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,21 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing
}
}

test("SPARK-24911: keep quotes for nested fields") {
withTable("t1") {
val createTable = "CREATE TABLE `t1`(`a` STRUCT<`b`: STRING>)"
sql(createTable)
val shownDDL = sql(s"SHOW CREATE TABLE t1")
.head()
.getString(0)
.split("\n")
.head
assert(shownDDL == createTable)

checkCreateTable("t1")
}
}

private def createRawHiveTable(ddl: String): Unit = {
hiveContext.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog]
.client.runSqlHive(ddl)
Expand Down

0 comments on commit 2f77616

Please sign in to comment.