Skip to content

Commit

Permalink
[SPARK-17409][SQL][FOLLOW-UP] Do Not Optimize Query in CTAS More Than…
Browse files Browse the repository at this point in the history
… Once

### What changes were proposed in this pull request?
This follow-up PR is for addressing the [comment](apache#15048).

We added two test cases based on the suggestion from yhuai . One is a new test case using the `saveAsTable` API to create a data source table. Another is for CTAS on Hive serde table.

Note: No need to backport this PR to 2.0. Will submit a new PR to backport the whole fix with new test cases to Spark 2.0

### How was this patch tested?
N/A

Author: gatorsmile <[email protected]>

Closes apache#15459 from gatorsmile/ctasOptimizedTestCases.
  • Loading branch information
gatorsmile authored and cloud-fan committed Oct 25, 2016
1 parent 84a3399 commit d479c52
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 3 deletions.
18 changes: 18 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1599,6 +1599,24 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
assert(df.persist.take(1).apply(0).toSeq(100).asInstanceOf[Long] == 100)
}

test("SPARK-17409: Do Not Optimize Query in CTAS (Data source tables) More Than Once") {
withTable("bar") {
withTempView("foo") {
withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME.key -> "json") {
sql("select 0 as id").createOrReplaceTempView("foo")
val df = sql("select * from foo group by id")
// If we optimize the query in CTAS more than once, the following saveAsTable will fail
// with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])`
df.write.mode("overwrite").saveAsTable("bar")
checkAnswer(spark.table("bar"), Row(0) :: Nil)
val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar"))
assert(tableMetadata.provider == Some("json"),
"the expected table is a data source table using json")
}
}
}
}

test("copy results for sampling with replacement") {
val df = Seq((1, 0), (2, 0), (3, 0)).toDF("a", "b")
val sampleDf = df.sample(true, 2.00)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ class CreateTableAsSelectSuite
}
}

test("CTAS of decimal calculation") {
test("SPARK-17409: CTAS of decimal calculation") {
withTable("tab2") {
withTempView("tab1") {
spark.range(99, 101).createOrReplaceTempView("tab1")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@

package org.apache.spark.sql.hive

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.{QueryTest, Row}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}

class MetastoreRelationSuite extends SparkFunSuite {
class MetastoreRelationSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
test("makeCopy and toJSON should work") {
val table = CatalogTable(
identifier = TableIdentifier("test", Some("db")),
Expand All @@ -36,4 +38,18 @@ class MetastoreRelationSuite extends SparkFunSuite {
// No exception should be thrown
relation.toJSON
}

test("SPARK-17409: Do Not Optimize Query in CTAS (Hive Serde Table) More Than Once") {
withTable("bar") {
withTempView("foo") {
sql("select 0 as id").createOrReplaceTempView("foo")
// If we optimize the query in CTAS more than once, the following saveAsTable will fail
// with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])`
sql("CREATE TABLE bar AS SELECT * FROM foo group by id")
checkAnswer(spark.table("bar"), Row(0) :: Nil)
val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar"))
assert(tableMetadata.provider == Some("hive"), "the expected table is a Hive serde table")
}
}
}
}

0 comments on commit d479c52

Please sign in to comment.