From b38a21ef6146784e4b93ef4ce8c899f1eee14572 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 16 Nov 2015 18:30:26 -0800 Subject: [PATCH 01/20] SPARK-11633 --- .../spark/sql/catalyst/analysis/Analyzer.scala | 3 ++- .../spark/sql/hive/execution/SQLQuerySuite.scala | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 2f4670b55bdba..5a5b71e52dd79 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -425,7 +425,8 @@ class Analyzer( */ j case Some((oldRelation, newRelation)) => - val attributeRewrites = AttributeMap(oldRelation.output.zip(newRelation.output)) + val attributeRewrites = + AttributeMap(oldRelation.output.zip(newRelation.output).filter(x => x._1 != x._2)) val newRight = right transformUp { case r if r == oldRelation => newRelation } transformUp { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 3427152b2da02..5e00546a74c00 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -51,6 +51,8 @@ case class Order( state: String, month: Int) +case class Individual(F1: Integer, F2: Integer) + case class WindowData( month: Int, area: String, @@ -1479,4 +1481,18 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { |FROM (SELECT '{"f1": "value1", "f2": 12}' json, 'hello' as str) test """.stripMargin), Row("value1", "12", 3.14, "hello")) } + + test ("SPARK-11633: HiveContext throws TreeNode Exception : Failed to Copy Node") { + val rdd1 = sparkContext.parallelize(Seq( Individual(1,3), Individual(2,1))) + val df = hiveContext.createDataFrame(rdd1) + df.registerTempTable("foo") + val df2 = sql("select f1, F2 as F2 from foo") + df2.registerTempTable("foo2") + df2.registerTempTable("foo3") + + checkAnswer(sql( + """ + SELECT a.F1 FROM foo2 a INNER JOIN foo3 b ON a.F2=b.F2 + """.stripMargin), Row(2) :: Row(1) :: Nil) + } } From 0546772f151f83d6d3cf4d000cbe341f52545007 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 20 Nov 2015 10:56:45 -0800 Subject: [PATCH 02/20] converge --- .../spark/sql/catalyst/analysis/Analyzer.scala | 3 +-- .../spark/sql/hive/execution/SQLQuerySuite.scala | 15 --------------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 7c9512fbd00aa..47962ebe6ef82 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -417,8 +417,7 @@ class Analyzer( */ j case Some((oldRelation, newRelation)) => - val attributeRewrites = - AttributeMap(oldRelation.output.zip(newRelation.output).filter(x => x._1 != x._2)) + val attributeRewrites = AttributeMap(oldRelation.output.zip(newRelation.output)) val newRight = right transformUp { case r if r == oldRelation => newRelation } transformUp { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 5e00546a74c00..61d9dcd37572b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -51,8 +51,6 @@ case class Order( state: String, month: Int) -case class Individual(F1: Integer, F2: Integer) - case class WindowData( month: Int, area: String, @@ -1481,18 +1479,5 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { |FROM (SELECT '{"f1": "value1", "f2": 12}' json, 'hello' as str) test """.stripMargin), Row("value1", "12", 3.14, "hello")) } - - test ("SPARK-11633: HiveContext throws TreeNode Exception : Failed to Copy Node") { - val rdd1 = sparkContext.parallelize(Seq( Individual(1,3), Individual(2,1))) - val df = hiveContext.createDataFrame(rdd1) - df.registerTempTable("foo") - val df2 = sql("select f1, F2 as F2 from foo") - df2.registerTempTable("foo2") - df2.registerTempTable("foo3") - - checkAnswer(sql( - """ - SELECT a.F1 FROM foo2 a INNER JOIN foo3 b ON a.F2=b.F2 - """.stripMargin), Row(2) :: Row(1) :: Nil) } } From b37a64f13956b6ddd0e38ddfd9fe1caee611f1a8 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 20 Nov 2015 10:58:37 -0800 Subject: [PATCH 03/20] converge --- .../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 61d9dcd37572b..3427152b2da02 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1479,5 +1479,4 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { |FROM (SELECT '{"f1": "value1", "f2": 12}' json, 'hello' as str) test """.stripMargin), Row("value1", "12", 3.14, "hello")) } - } } From e09ac7c7981ff8ee405978b019abca1a6a0168cb Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 2 Apr 2016 16:51:07 -0700 Subject: [PATCH 04/20] unsupported operations in SQL Context. --- .../apache/spark/sql/execution/SparkSqlParser.scala | 7 +++++++ .../spark/sql/execution/SparkStrategies.scala | 3 +++ .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 13 +++++++++++++ .../sql/execution/command/DDLCommandSuite.scala | 5 +++++ 4 files changed, 28 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index ff3ab7746cc86..c898db9e539e4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -853,4 +853,11 @@ class SparkSqlAstBuilder extends AstBuilder { override def visitConstantList(ctx: ConstantListContext): Seq[String] = withOrigin(ctx) { ctx.constant.asScala.map(visitStringConstant) } + + /** + * DropTable: not supported in SQL Context + */ + override def visitDropTable(ctx: DropTableContext): LogicalPlan = withOrigin(ctx) { + throw new ParseException(s"DROP TABLE is not supported in SQLContext.", ctx) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 5bcc172ca7655..ee57310c3d315 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -332,6 +332,9 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case r: RunnableCommand => ExecutedCommand(r) :: Nil + case s: logical.ScriptTransformation => + sys.error("Sript Transform is not supported in SQLContext. Use a HiveContext instead.") + case logical.Distinct(child) => throw new IllegalStateException( "logical distinct operator should have been replaced by aggregate in the optimizer") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index b727e88668370..b9a6625cb39a8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -658,6 +658,19 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } + test("script transform") { + val e = intercept[RuntimeException] { + sql( + """SELECT TRANSFORM (key, value) + |USING 'cat' AS (tKey, tValue) + |FROM testData + """. + stripMargin).show() + } + assert(e.getMessage contains + "Sript Transform is not supported in SQLContext. Use a HiveContext instead.") + } + test("date row") { checkAnswer(sql( """select cast("2015-01-28" as date) from testData limit 1"""), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala index 458f36e8323c5..aff37683f806d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.execution.command import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.execution.SparkSqlParser import org.apache.spark.sql.execution.datasources.BucketSpec @@ -773,4 +774,8 @@ class DDLCommandSuite extends PlanTest { comparePlans(parsed2, expected2) } + test("commands in HiveSqlParser") { + intercept[ParseException] { + parser.parsePlan("DROP TABLE D1.T1") + } } From 62c814d630b2a1e3aa004942f178628a83a40919 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 2 Apr 2016 17:10:45 -0700 Subject: [PATCH 05/20] style fix. --- .../scala/org/apache/spark/sql/execution/SparkStrategies.scala | 2 +- .../apache/spark/sql/execution/command/DDLCommandSuite.scala | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index ee57310c3d315..e2e5c46532994 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -332,7 +332,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case r: RunnableCommand => ExecutedCommand(r) :: Nil - case s: logical.ScriptTransformation => + case _: logical.ScriptTransformation => sys.error("Sript Transform is not supported in SQLContext. Use a HiveContext instead.") case logical.Distinct(child) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala index aff37683f806d..e7b337cb17ee7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala @@ -778,4 +778,5 @@ class DDLCommandSuite extends PlanTest { intercept[ParseException] { parser.parsePlan("DROP TABLE D1.T1") } + } } From 41782913a7c92666110ed5b63f172dba89b44d7d Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 2 Apr 2016 21:43:54 -0700 Subject: [PATCH 06/20] alter view + create view --- .../spark/sql/execution/SparkSqlParser.scala | 14 ++++++++++++++ .../sql/execution/command/DDLCommandSuite.scala | 6 ++++++ 2 files changed, 20 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index c898db9e539e4..2c0b4b49dc6d8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -860,4 +860,18 @@ class SparkSqlAstBuilder extends AstBuilder { override def visitDropTable(ctx: DropTableContext): LogicalPlan = withOrigin(ctx) { throw new ParseException(s"DROP TABLE is not supported in SQLContext.", ctx) } + + /** + * CreateView: not supported in SQL Context + */ + override def visitCreateView(ctx: CreateViewContext): LogicalPlan = withOrigin(ctx) { + throw new ParseException(s"CREATE VIEW is not supported in SQLContext.", ctx) + } + + /** + * AlterViewQuery: not supported in SQL Context + */ + override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) { + throw new ParseException(s"ALTER VIEW AS QUERY is not supported in SQLContext.", ctx) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala index e7b337cb17ee7..91184f5648192 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala @@ -778,5 +778,11 @@ class DDLCommandSuite extends PlanTest { intercept[ParseException] { parser.parsePlan("DROP TABLE D1.T1") } + intercept[ParseException] { + parser.parsePlan("CREATE VIEW testView AS SELECT id FROM tab") + } + intercept[ParseException] { + parser.parsePlan("ALTER VIEW testView AS SELECT id FROM tab") + } } } From 5807347bcfdb2133e6e0e1f75639374bc2044ad7 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 2 Apr 2016 22:36:59 -0700 Subject: [PATCH 07/20] create table --- .../org/apache/spark/sql/execution/SparkSqlParser.scala | 7 +++++++ .../spark/sql/execution/command/DDLCommandSuite.scala | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 2c0b4b49dc6d8..366614d2ec860 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -874,4 +874,11 @@ class SparkSqlAstBuilder extends AstBuilder { override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) { throw new ParseException(s"ALTER VIEW AS QUERY is not supported in SQLContext.", ctx) } + + /** + * CreateTable: not supported in SQL Context + */ + override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = { + throw new ParseException(s"CREATE non-temporary TABLE is not supported in SQLContext.", ctx) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala index 91184f5648192..65eda53ed9ce9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala @@ -784,5 +784,12 @@ class DDLCommandSuite extends PlanTest { intercept[ParseException] { parser.parsePlan("ALTER VIEW testView AS SELECT id FROM tab") } + intercept[ParseException] { + parser.parsePlan( + """ + |CREATE EXTERNAL TABLE parquet_tab2(c1 INT, c2 STRING) + |TBLPROPERTIES('prop1Key '= "prop1Val", ' `prop2Key` '= "prop2Val") + """.stripMargin) + } } } From 3a2f06e313357af357a291ed57bce3f4f5005651 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 2 Apr 2016 22:39:34 -0700 Subject: [PATCH 08/20] style fix. --- .../org/apache/spark/sql/execution/SparkStrategies.scala | 2 +- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index e2e5c46532994..bbaa0984569f3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -333,7 +333,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { case r: RunnableCommand => ExecutedCommand(r) :: Nil case _: logical.ScriptTransformation => - sys.error("Sript Transform is not supported in SQLContext. Use a HiveContext instead.") + sys.error("Script Transform is not supported in SQLContext. Use a HiveContext instead.") case logical.Distinct(child) => throw new IllegalStateException( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index b9a6625cb39a8..0899192e2a1a7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -664,11 +664,10 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { """SELECT TRANSFORM (key, value) |USING 'cat' AS (tKey, tValue) |FROM testData - """. - stripMargin).show() + """.stripMargin).show() } assert(e.getMessage contains - "Sript Transform is not supported in SQLContext. Use a HiveContext instead.") + "Script Transform is not supported in SQLContext. Use a HiveContext instead.") } test("date row") { From f3d886fc399354fde3c0f61a0442ad03003cd1e5 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Sun, 3 Apr 2016 16:59:52 +0200 Subject: [PATCH 09/20] Issue Error Messages for Unsupported Operators --- .../spark/sql/catalyst/parser/SqlBase.g4 | 14 +-- .../sql/catalyst/parser/AstBuilder.scala | 110 +++++++++--------- 2 files changed, 62 insertions(+), 62 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index f34bb061e471b..0820663ea060a 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -464,15 +464,15 @@ booleanExpression // https://github.com/antlr/antlr4/issues/780 // https://github.com/antlr/antlr4/issues/781 predicated - : valueExpression predicate[$valueExpression.ctx]? + : valueExpression predicate? ; -predicate[ParserRuleContext value] - : NOT? BETWEEN lower=valueExpression AND upper=valueExpression #between - | NOT? IN '(' expression (',' expression)* ')' #inList - | NOT? IN '(' query ')' #inSubquery - | NOT? like=(RLIKE | LIKE) pattern=valueExpression #like - | IS NOT? NULL #nullPredicate +predicate + : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression + | NOT? kind=IN '(' expression (',' expression)* ')' + | NOT? kind=IN '(' query ')' + | NOT? kind=(RLIKE | LIKE) pattern=valueExpression + | IS NOT? kind=NULL ; valueExpression diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 8541b1f7c62d0..db64a582be882 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -22,7 +22,7 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import org.antlr.v4.runtime.{ParserRuleContext, Token} -import org.antlr.v4.runtime.tree.{ParseTree, TerminalNode} +import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode} import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} @@ -46,6 +46,19 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { ctx.accept(this).asInstanceOf[T] } + /** + * Override the default behavior for all visit methods. This will only return a non-null result + * when the context has only one child. This is done because there is no generic method to + * combine the results of the context children. In all other cases null is returned. + */ + override def visitChildren(node: RuleNode): AnyRef = { + if (node.getChildCount == 1) { + node.getChild(0).accept(this) + } else { + null + } + } + override def visitSingleStatement(ctx: SingleStatementContext): LogicalPlan = withOrigin(ctx) { visit(ctx.statement).asInstanceOf[LogicalPlan] } @@ -778,17 +791,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { trees.asScala.map(expression) } - /** - * Invert a boolean expression if it has a valid NOT clause. - */ - private def invertIfNotDefined(expression: Expression, not: TerminalNode): Expression = { - if (not != null) { - Not(expression) - } else { - expression - } - } - /** * Create a star (i.e. all) expression; this selects all elements (in the specified object). * Both un-targeted (global) and targeted aliases are supported. @@ -909,57 +911,55 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { } /** - * Create a BETWEEN expression. This tests if an expression lies with in the bounds set by two - * other expressions. The inverse can also be created. - */ - override def visitBetween(ctx: BetweenContext): Expression = withOrigin(ctx) { - val value = expression(ctx.value) - val between = And( - GreaterThanOrEqual(value, expression(ctx.lower)), - LessThanOrEqual(value, expression(ctx.upper))) - invertIfNotDefined(between, ctx.NOT) - } - - /** - * Create an IN expression. This tests if the value of the left hand side expression is - * contained by the sequence of expressions on the right hand side. + * Create a predicated expression. A predicated expression is a normal expression with a + * predicate attached to it, for example: + * {{{ + * a + 1 IS NULL + * }}} */ - override def visitInList(ctx: InListContext): Expression = withOrigin(ctx) { - val in = In(expression(ctx.value), ctx.expression().asScala.map(expression)) - invertIfNotDefined(in, ctx.NOT) + override def visitPredicated(ctx: PredicatedContext): Expression = withOrigin(ctx) { + val e = expression(ctx.valueExpression) + if (ctx.predicate != null) { + withPredicate(e, ctx.predicate) + } else { + e + } } /** - * Create an IN expression, where the the right hand side is a query. This is unsupported. + * Add a predicate to the given expression. Supported expressions are: + * - (NOT) BETWEEN + * - (NOT) IN + * - (NOT) LIKE + * - (NOT) RLIKE + * - IS (NOT) NULL. */ - override def visitInSubquery(ctx: InSubqueryContext): Expression = { - throw new ParseException("IN with a Sub-query is currently not supported.", ctx) - } + private def withPredicate(e: Expression, ctx: PredicateContext): Expression = withOrigin(ctx) { + // Invert a predicate if it has a valid NOT clause. + def invertIfNotDefined(e: Expression): Expression = ctx.NOT match { + case null => e + case not => Not(e) + } - /** - * Create a (R)LIKE/REGEXP expression. - */ - override def visitLike(ctx: LikeContext): Expression = { - val left = expression(ctx.value) - val right = expression(ctx.pattern) - val like = ctx.like.getType match { + // Create the predicate. + ctx.kind.getType match { + case SqlBaseParser.BETWEEN => + // BETWEEN is translated to lower <= e && e <= upper + invertIfNotDefined(And( + GreaterThanOrEqual(e, expression(ctx.lower)), + LessThanOrEqual(e, expression(ctx.upper)))) + case SqlBaseParser.IN if ctx.query != null => + throw new ParseException("IN with a Sub-query is currently not supported.", ctx) + case SqlBaseParser.IN => + invertIfNotDefined(In(e, ctx.expression.asScala.map(expression))) case SqlBaseParser.LIKE => - Like(left, right) + invertIfNotDefined(Like(e, expression(ctx.pattern))) case SqlBaseParser.RLIKE => - RLike(left, right) - } - invertIfNotDefined(like, ctx.NOT) - } - - /** - * Create an IS (NOT) NULL expression. - */ - override def visitNullPredicate(ctx: NullPredicateContext): Expression = withOrigin(ctx) { - val value = expression(ctx.value) - if (ctx.NOT != null) { - IsNotNull(value) - } else { - IsNull(value) + invertIfNotDefined(RLike(e, expression(ctx.pattern))) + case SqlBaseParser.NULL if ctx.NOT != null => + IsNotNull(e) + case SqlBaseParser.NULL => + IsNull(e) } } From 51a9b69691613cec301a92cd1a3195ee4081ba3d Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 3 Apr 2016 08:44:02 -0700 Subject: [PATCH 10/20] merge Herman's fix and revert the changes back --- .../spark/sql/catalyst/parser/SqlBase.g4 | 14 +-- .../sql/catalyst/parser/AstBuilder.scala | 110 +++++++++--------- .../spark/sql/execution/SparkSqlParser.scala | 28 ----- 3 files changed, 62 insertions(+), 90 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index f34bb061e471b..0820663ea060a 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -464,15 +464,15 @@ booleanExpression // https://github.com/antlr/antlr4/issues/780 // https://github.com/antlr/antlr4/issues/781 predicated - : valueExpression predicate[$valueExpression.ctx]? + : valueExpression predicate? ; -predicate[ParserRuleContext value] - : NOT? BETWEEN lower=valueExpression AND upper=valueExpression #between - | NOT? IN '(' expression (',' expression)* ')' #inList - | NOT? IN '(' query ')' #inSubquery - | NOT? like=(RLIKE | LIKE) pattern=valueExpression #like - | IS NOT? NULL #nullPredicate +predicate + : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression + | NOT? kind=IN '(' expression (',' expression)* ')' + | NOT? kind=IN '(' query ')' + | NOT? kind=(RLIKE | LIKE) pattern=valueExpression + | IS NOT? kind=NULL ; valueExpression diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index c350f3049f152..a800e36ab21f7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -22,7 +22,7 @@ import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import org.antlr.v4.runtime.{ParserRuleContext, Token} -import org.antlr.v4.runtime.tree.{ParseTree, TerminalNode} +import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode} import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} @@ -46,6 +46,19 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { ctx.accept(this).asInstanceOf[T] } + /** + * Override the default behavior for all visit methods. This will only return a non-null result + * when the context has only one child. This is done because there is no generic method to + * combine the results of the context children. In all other cases null is returned. + */ + override def visitChildren(node: RuleNode): AnyRef = { + if (node.getChildCount == 1) { + node.getChild(0).accept(this) + } else { + null + } + } + override def visitSingleStatement(ctx: SingleStatementContext): LogicalPlan = withOrigin(ctx) { visit(ctx.statement).asInstanceOf[LogicalPlan] } @@ -778,17 +791,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { trees.asScala.map(expression) } - /** - * Invert a boolean expression if it has a valid NOT clause. - */ - private def invertIfNotDefined(expression: Expression, not: TerminalNode): Expression = { - if (not != null) { - Not(expression) - } else { - expression - } - } - /** * Create a star (i.e. all) expression; this selects all elements (in the specified object). * Both un-targeted (global) and targeted aliases are supported. @@ -909,57 +911,55 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { } /** - * Create a BETWEEN expression. This tests if an expression lies with in the bounds set by two - * other expressions. The inverse can also be created. - */ - override def visitBetween(ctx: BetweenContext): Expression = withOrigin(ctx) { - val value = expression(ctx.value) - val between = And( - GreaterThanOrEqual(value, expression(ctx.lower)), - LessThanOrEqual(value, expression(ctx.upper))) - invertIfNotDefined(between, ctx.NOT) - } - - /** - * Create an IN expression. This tests if the value of the left hand side expression is - * contained by the sequence of expressions on the right hand side. + * Create a predicated expression. A predicated expression is a normal expression with a + * predicate attached to it, for example: + * {{{ + * a + 1 IS NULL + * }}} */ - override def visitInList(ctx: InListContext): Expression = withOrigin(ctx) { - val in = In(expression(ctx.value), ctx.expression().asScala.map(expression)) - invertIfNotDefined(in, ctx.NOT) + override def visitPredicated(ctx: PredicatedContext): Expression = withOrigin(ctx) { + val e = expression(ctx.valueExpression) + if (ctx.predicate != null) { + withPredicate(e, ctx.predicate) + } else { + e + } } /** - * Create an IN expression, where the the right hand side is a query. This is unsupported. + * Add a predicate to the given expression. Supported expressions are: + * - (NOT) BETWEEN + * - (NOT) IN + * - (NOT) LIKE + * - (NOT) RLIKE + * - IS (NOT) NULL. */ - override def visitInSubquery(ctx: InSubqueryContext): Expression = { - throw new ParseException("IN with a Sub-query is currently not supported.", ctx) - } + private def withPredicate(e: Expression, ctx: PredicateContext): Expression = withOrigin(ctx) { + // Invert a predicate if it has a valid NOT clause. + def invertIfNotDefined(e: Expression): Expression = ctx.NOT match { + case null => e + case not => Not(e) + } - /** - * Create a (R)LIKE/REGEXP expression. - */ - override def visitLike(ctx: LikeContext): Expression = { - val left = expression(ctx.value) - val right = expression(ctx.pattern) - val like = ctx.like.getType match { + // Create the predicate. + ctx.kind.getType match { + case SqlBaseParser.BETWEEN => + // BETWEEN is translated to lower <= e && e <= upper + invertIfNotDefined(And( + GreaterThanOrEqual(e, expression(ctx.lower)), + LessThanOrEqual(e, expression(ctx.upper)))) + case SqlBaseParser.IN if ctx.query != null => + throw new ParseException("IN with a Sub-query is currently not supported.", ctx) + case SqlBaseParser.IN => + invertIfNotDefined(In(e, ctx.expression.asScala.map(expression))) case SqlBaseParser.LIKE => - Like(left, right) + invertIfNotDefined(Like(e, expression(ctx.pattern))) case SqlBaseParser.RLIKE => - RLike(left, right) - } - invertIfNotDefined(like, ctx.NOT) - } - - /** - * Create an IS (NOT) NULL expression. - */ - override def visitNullPredicate(ctx: NullPredicateContext): Expression = withOrigin(ctx) { - val value = expression(ctx.value) - if (ctx.NOT != null) { - IsNotNull(value) - } else { - IsNull(value) + invertIfNotDefined(RLike(e, expression(ctx.pattern))) + case SqlBaseParser.NULL if ctx.NOT != null => + IsNotNull(e) + case SqlBaseParser.NULL => + IsNull(e) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 366614d2ec860..ff3ab7746cc86 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -853,32 +853,4 @@ class SparkSqlAstBuilder extends AstBuilder { override def visitConstantList(ctx: ConstantListContext): Seq[String] = withOrigin(ctx) { ctx.constant.asScala.map(visitStringConstant) } - - /** - * DropTable: not supported in SQL Context - */ - override def visitDropTable(ctx: DropTableContext): LogicalPlan = withOrigin(ctx) { - throw new ParseException(s"DROP TABLE is not supported in SQLContext.", ctx) - } - - /** - * CreateView: not supported in SQL Context - */ - override def visitCreateView(ctx: CreateViewContext): LogicalPlan = withOrigin(ctx) { - throw new ParseException(s"CREATE VIEW is not supported in SQLContext.", ctx) - } - - /** - * AlterViewQuery: not supported in SQL Context - */ - override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) { - throw new ParseException(s"ALTER VIEW AS QUERY is not supported in SQLContext.", ctx) - } - - /** - * CreateTable: not supported in SQL Context - */ - override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = { - throw new ParseException(s"CREATE non-temporary TABLE is not supported in SQLContext.", ctx) - } } From 6b1c9fb70ddfab957cceda0f65e401500347319e Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 3 Apr 2016 14:09:00 -0700 Subject: [PATCH 11/20] storage file format --- .../spark/sql/hive/execution/HiveSqlParser.scala | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala index 12e4f49756c35..481025f155804 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala @@ -133,6 +133,16 @@ class HiveSqlAstBuilder extends SparkSqlAstBuilder { } } + override def visitCreateFileFormat( + ctx: CreateFileFormatContext): CatalogStorageFormat = withOrigin(ctx) { + // Create the predicate. + if (ctx.storageHandler == null) { + typedVisit[CatalogStorageFormat](ctx.fileFormat) + } else { + typedVisit[CatalogStorageFormat](ctx.storageHandler) + } + } + /** * Create a [[CreateTableAsSelect]] command. */ From 237b73bee8f95feae8fb38d13b2e79c25ef70bc6 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 3 Apr 2016 14:22:31 -0700 Subject: [PATCH 12/20] syntax fix. --- .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 0899192e2a1a7..ba2458e411540 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -661,10 +661,11 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { test("script transform") { val e = intercept[RuntimeException] { sql( - """SELECT TRANSFORM (key, value) - |USING 'cat' AS (tKey, tValue) - |FROM testData - """.stripMargin).show() + """ + |SELECT TRANSFORM (key, value) + |USING 'cat' AS (tKey, tValue) + |FROM testData + """.stripMargin).show() } assert(e.getMessage contains "Script Transform is not supported in SQLContext. Use a HiveContext instead.") From 0b786c037c28f6ba633580d8492380ad59a6b1e5 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 3 Apr 2016 14:41:52 -0700 Subject: [PATCH 13/20] address comments --- .../org/apache/spark/sql/hive/execution/HiveSqlParser.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala index 481025f155804..61f0c5d7a3223 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala @@ -133,9 +133,11 @@ class HiveSqlAstBuilder extends SparkSqlAstBuilder { } } + /** + * Create a [[CatalogStorageFormat]]. This is part of the [[CreateTableAsSelect]] command. + */ override def visitCreateFileFormat( ctx: CreateFileFormatContext): CatalogStorageFormat = withOrigin(ctx) { - // Create the predicate. if (ctx.storageHandler == null) { typedVisit[CatalogStorageFormat](ctx.fileFormat) } else { From c7c946103cd69aaba4dabb968d15602d2bdaaae0 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 3 Apr 2016 23:44:08 -0700 Subject: [PATCH 14/20] address comments. --- .../org/apache/spark/sql/hive/execution/HiveSqlParser.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala index 61f0c5d7a3223..a7e2b1d540776 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala @@ -141,7 +141,7 @@ class HiveSqlAstBuilder extends SparkSqlAstBuilder { if (ctx.storageHandler == null) { typedVisit[CatalogStorageFormat](ctx.fileFormat) } else { - typedVisit[CatalogStorageFormat](ctx.storageHandler) + visitStorageHandler(ctx.storageHandler) } } @@ -403,7 +403,8 @@ class HiveSqlAstBuilder extends SparkSqlAstBuilder { /** * Storage Handlers are currently not supported in the statements we support (CTAS). */ - override def visitStorageHandler(ctx: StorageHandlerContext): AnyRef = withOrigin(ctx) { + override def visitStorageHandler( + ctx: StorageHandlerContext): CatalogStorageFormat = withOrigin(ctx) { throw new ParseException("Storage Handlers are currently unsupported.", ctx) } From e69a7e8121dd8565e016ae0c400a66afe7ad00fe Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 4 Apr 2016 00:50:38 -0700 Subject: [PATCH 15/20] address comments. --- .../spark/sql/catalyst/parser/AstBuilder.scala | 6 ++++-- .../spark/sql/execution/SparkSqlParser.scala | 16 ++++++++++++++++ .../spark/sql/execution/SparkStrategies.scala | 3 --- .../org/apache/spark/sql/SQLQuerySuite.scala | 13 ------------- .../sql/execution/command/DDLCommandSuite.scala | 5 ++++- .../spark/sql/hive/execution/HiveSqlParser.scala | 3 ++- 6 files changed, 26 insertions(+), 20 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index db64a582be882..bc8048359751d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -364,7 +364,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { string(script), attributes, withFilter, - withScriptIOSchema(inRowFormat, recordWriter, outRowFormat, recordReader, schemaLess)) + withScriptIOSchema(ctx, inRowFormat, recordWriter, outRowFormat, recordReader, schemaLess) + .asInstanceOf[ScriptInputOutputSchema]) case SqlBaseParser.SELECT => // Regular select @@ -411,11 +412,12 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { * Create a (Hive based) [[ScriptInputOutputSchema]]. */ protected def withScriptIOSchema( + ctx: QuerySpecificationContext, inRowFormat: RowFormatContext, recordWriter: Token, outRowFormat: RowFormatContext, recordReader: Token, - schemaLess: Boolean): ScriptInputOutputSchema = null + schemaLess: Boolean): AnyRef = null /** * Create a logical plan for a given 'FROM' clause. Note that we support multiple (comma diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index ff3ab7746cc86..1b4531257d3e8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -18,6 +18,8 @@ package org.apache.spark.sql.execution import scala.collection.JavaConverters._ +import org.antlr.v4.runtime.Token + import org.apache.spark.sql.SaveMode import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.parser.{AbstractSqlParser, AstBuilder, ParseException} @@ -182,6 +184,20 @@ class SparkSqlAstBuilder extends AstBuilder { (visitTableIdentifier(ctx.tableIdentifier), temporary, ifNotExists, ctx.EXTERNAL != null) } + /** + * Unsupported operation in SQL Context. + */ + override protected def withScriptIOSchema( + ctx: QuerySpecificationContext, + inRowFormat: RowFormatContext, + recordWriter: Token, + outRowFormat: RowFormatContext, + recordReader: Token, + schemaLess: Boolean): AnyRef = { + throw new ParseException( + "Script Transform is not supported in SQLContext. Use a HiveContext instead", ctx) + } + /** * Create a [[CreateTableUsing]] or a [[CreateTableUsingAsSelect]] logical plan. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index bbaa0984569f3..5bcc172ca7655 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -332,9 +332,6 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case r: RunnableCommand => ExecutedCommand(r) :: Nil - case _: logical.ScriptTransformation => - sys.error("Script Transform is not supported in SQLContext. Use a HiveContext instead.") - case logical.Distinct(child) => throw new IllegalStateException( "logical distinct operator should have been replaced by aggregate in the optimizer") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index ba2458e411540..b727e88668370 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -658,19 +658,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("script transform") { - val e = intercept[RuntimeException] { - sql( - """ - |SELECT TRANSFORM (key, value) - |USING 'cat' AS (tKey, tValue) - |FROM testData - """.stripMargin).show() - } - assert(e.getMessage contains - "Script Transform is not supported in SQLContext. Use a HiveContext instead.") - } - test("date row") { checkAnswer(sql( """select cast("2015-01-28" as date) from testData limit 1"""), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala index 65eda53ed9ce9..fd9bd8b0f2436 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala @@ -774,7 +774,7 @@ class DDLCommandSuite extends PlanTest { comparePlans(parsed2, expected2) } - test("commands in HiveSqlParser") { + test("commands only available in HiveContext") { intercept[ParseException] { parser.parsePlan("DROP TABLE D1.T1") } @@ -791,5 +791,8 @@ class DDLCommandSuite extends PlanTest { |TBLPROPERTIES('prop1Key '= "prop1Val", ' `prop2Key` '= "prop2Val") """.stripMargin) } + intercept[ParseException] { + parser.parsePlan("SELECT TRANSFORM (key, value) USING 'cat' AS (tKey, tValue) FROM testData") + } } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala index a7e2b1d540776..7dfaa913add37 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala @@ -294,11 +294,12 @@ class HiveSqlAstBuilder extends SparkSqlAstBuilder { * Create a [[HiveScriptIOSchema]]. */ override protected def withScriptIOSchema( + ctx: QuerySpecificationContext, inRowFormat: RowFormatContext, recordWriter: Token, outRowFormat: RowFormatContext, recordReader: Token, - schemaLess: Boolean): HiveScriptIOSchema = { + schemaLess: Boolean): AnyRef = { if (recordWriter != null || recordReader != null) { logWarning("Used defined record reader/writer classes are currently ignored.") } From e4f06f9acc1f76f5b3122841571e8f6b9308b0b7 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 4 Apr 2016 07:26:22 -0700 Subject: [PATCH 16/20] revert it back. --- .../spark/sql/catalyst/parser/AstBuilder.scala | 9 ++++++--- .../spark/sql/execution/SparkSqlParser.scala | 14 -------------- .../spark/sql/hive/execution/HiveSqlParser.scala | 4 ++-- 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index bc8048359751d..73e9d95ff4f55 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -364,8 +364,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { string(script), attributes, withFilter, - withScriptIOSchema(ctx, inRowFormat, recordWriter, outRowFormat, recordReader, schemaLess) - .asInstanceOf[ScriptInputOutputSchema]) + withScriptIOSchema( + ctx, inRowFormat, recordWriter, outRowFormat, recordReader, schemaLess)) case SqlBaseParser.SELECT => // Regular select @@ -417,7 +417,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { recordWriter: Token, outRowFormat: RowFormatContext, recordReader: Token, - schemaLess: Boolean): AnyRef = null + schemaLess: Boolean): ScriptInputOutputSchema = { + throw new ParseException( + "Script Transform is not supported in SQLContext. Use a HiveContext instead", ctx) + } /** * Create a logical plan for a given 'FROM' clause. Note that we support multiple (comma diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 1b4531257d3e8..70041ca218adc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -184,20 +184,6 @@ class SparkSqlAstBuilder extends AstBuilder { (visitTableIdentifier(ctx.tableIdentifier), temporary, ifNotExists, ctx.EXTERNAL != null) } - /** - * Unsupported operation in SQL Context. - */ - override protected def withScriptIOSchema( - ctx: QuerySpecificationContext, - inRowFormat: RowFormatContext, - recordWriter: Token, - outRowFormat: RowFormatContext, - recordReader: Token, - schemaLess: Boolean): AnyRef = { - throw new ParseException( - "Script Transform is not supported in SQLContext. Use a HiveContext instead", ctx) - } - /** * Create a [[CreateTableUsing]] or a [[CreateTableUsingAsSelect]] logical plan. * diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala index 7dfaa913add37..16486bde94a0a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogColumn, CatalogStorageForma import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.parser._ import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ScriptInputOutputSchema} import org.apache.spark.sql.execution.SparkSqlAstBuilder import org.apache.spark.sql.hive.{CreateTableAsSelect => CTAS, CreateViewAsSelect => CreateView} import org.apache.spark.sql.hive.{HiveGenericUDTF, HiveSerDe} @@ -299,7 +299,7 @@ class HiveSqlAstBuilder extends SparkSqlAstBuilder { recordWriter: Token, outRowFormat: RowFormatContext, recordReader: Token, - schemaLess: Boolean): AnyRef = { + schemaLess: Boolean): ScriptInputOutputSchema = { if (recordWriter != null || recordReader != null) { logWarning("Used defined record reader/writer classes are currently ignored.") } From 07264c0eee8520cb0fa949fdd2afe47e9c120dab Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 4 Apr 2016 07:28:11 -0700 Subject: [PATCH 17/20] revert it back. --- .../scala/org/apache/spark/sql/execution/SparkSqlParser.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 70041ca218adc..ff3ab7746cc86 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -18,8 +18,6 @@ package org.apache.spark.sql.execution import scala.collection.JavaConverters._ -import org.antlr.v4.runtime.Token - import org.apache.spark.sql.SaveMode import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.parser.{AbstractSqlParser, AstBuilder, ParseException} From c618a78c249e7bf5f4160220feff2ba6a2390132 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 4 Apr 2016 07:31:58 -0700 Subject: [PATCH 18/20] revert it back. --- .../org/apache/spark/sql/hive/execution/HiveSqlParser.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala index 16486bde94a0a..55e69f99a41fd 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogColumn, CatalogStorageForma import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.parser._ import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ScriptInputOutputSchema} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkSqlAstBuilder import org.apache.spark.sql.hive.{CreateTableAsSelect => CTAS, CreateViewAsSelect => CreateView} import org.apache.spark.sql.hive.{HiveGenericUDTF, HiveSerDe} @@ -299,7 +299,7 @@ class HiveSqlAstBuilder extends SparkSqlAstBuilder { recordWriter: Token, outRowFormat: RowFormatContext, recordReader: Token, - schemaLess: Boolean): ScriptInputOutputSchema = { + schemaLess: Boolean): HiveScriptIOSchema = { if (recordWriter != null || recordReader != null) { logWarning("Used defined record reader/writer classes are currently ignored.") } From 4486c754e52a83b1f60f0426cc32b29a7cad0b6a Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 4 Apr 2016 10:43:33 -0700 Subject: [PATCH 19/20] move test cases. --- .../sql/catalyst/parser/PlanParserSuite.scala | 10 ------ .../apache/spark/sql/hive/HiveQlSuite.scala | 36 ++++++++++++++++--- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 23f05ce84667c..9e1660df06cc6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -122,16 +122,6 @@ class PlanParserSuite extends PlanTest { table("a").union(table("b")).as("c").select(star())) } - test("transform query spec") { - val p = ScriptTransformation(Seq('a, 'b), "func", Seq.empty, table("e"), null) - assertEqual("select transform(a, b) using 'func' from e where f < 10", - p.copy(child = p.child.where('f < 10), output = Seq('key.string, 'value.string))) - assertEqual("map a, b using 'func' as c, d from e", - p.copy(output = Seq('c.string, 'd.string))) - assertEqual("reduce a, b using 'func' as (c: int, d decimal(10, 0)) from e", - p.copy(output = Seq('c.int, 'd.decimal(10, 0)))) - } - test("multi select query") { assertEqual( "from a select * select * where s < 10", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala index 75108c6d47ea0..371a937641ea0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala @@ -18,16 +18,17 @@ package org.apache.spark.sql.hive import org.apache.hadoop.hive.serde.serdeConstants -import org.scalatest.BeforeAndAfterAll -import org.apache.spark.SparkFunSuite import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.catalog.{CatalogColumn, CatalogTable, CatalogTableType} -import org.apache.spark.sql.catalyst.expressions.JsonTuple -import org.apache.spark.sql.catalyst.plans.logical.Generate +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, JsonTuple} +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical.{Generate, ScriptTransformation} import org.apache.spark.sql.hive.execution.HiveSqlParser +import org.apache.spark.sql.types.{DecimalType, IntegerType, StringType} -class HiveQlSuite extends SparkFunSuite with BeforeAndAfterAll { +class HiveQlSuite extends PlanTest { val parser = HiveSqlParser private def extractTableDesc(sql: String): (CatalogTable, Boolean) = { @@ -201,6 +202,31 @@ class HiveQlSuite extends SparkFunSuite with BeforeAndAfterAll { assert(plan.children.head.asInstanceOf[Generate].generator.isInstanceOf[JsonTuple]) } + test("transform query spec") { + val plan1 = parser.parsePlan("select transform(a, b) using 'func' as c, d from e") + val plan2 = parser.parsePlan("map a, b using 'func' as c, d from e") + val plan3 = parser.parsePlan("reduce a, b using 'func' as c, d from e") + comparePlans(plan1, plan2) + comparePlans(plan2, plan3) + + assert(plan1.isInstanceOf[ScriptTransformation]) + assert(plan1.asInstanceOf[ScriptTransformation].input + == Seq(UnresolvedAttribute("a"), UnresolvedAttribute("b"))) + assert(plan1.asInstanceOf[ScriptTransformation].script + == "func") + assert(plan1.asInstanceOf[ScriptTransformation].output.map(_.name) + == Seq("c", "d")) + assert(plan1.asInstanceOf[ScriptTransformation].output.map(_.dataType) + == Seq(StringType, StringType)) + + val plan4 = parser.parsePlan("reduce a, b using 'func' as (c: int, d decimal(10, 0)) from e") + assert(plan4.isInstanceOf[ScriptTransformation]) + assert(plan1.asInstanceOf[ScriptTransformation].output.map(_.name) + == Seq("c", "d")) + assert(plan4.asInstanceOf[ScriptTransformation].output.map(_.dataType) + == Seq(IntegerType, DecimalType(10, 0))) + } + test("use backticks in output of Script Transform") { val plan = parser.parsePlan( """SELECT `t`.`thing1` From 249febf8bd3bc7d3911cd65f0c656b1bc0c18b3a Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Mon, 4 Apr 2016 18:14:31 -0700 Subject: [PATCH 20/20] address comments. --- .../sql/catalyst/parser/AstBuilder.scala | 3 +- .../apache/spark/sql/hive/HiveQlSuite.scala | 43 +++++++++---------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 73e9d95ff4f55..42abb3e2fdbc8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -418,8 +418,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { outRowFormat: RowFormatContext, recordReader: Token, schemaLess: Boolean): ScriptInputOutputSchema = { - throw new ParseException( - "Script Transform is not supported in SQLContext. Use a HiveContext instead", ctx) + throw new ParseException("Script Transform is not supported", ctx) } /** diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala index 371a937641ea0..a8a0d6b8de364 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala @@ -22,11 +22,13 @@ import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.catalog.{CatalogColumn, CatalogTable, CatalogTableType} -import org.apache.spark.sql.catalyst.expressions.{AttributeReference, JsonTuple} +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans +import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan +import org.apache.spark.sql.catalyst.expressions.JsonTuple import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Generate, ScriptTransformation} import org.apache.spark.sql.hive.execution.HiveSqlParser -import org.apache.spark.sql.types.{DecimalType, IntegerType, StringType} class HiveQlSuite extends PlanTest { val parser = HiveSqlParser @@ -203,28 +205,23 @@ class HiveQlSuite extends PlanTest { } test("transform query spec") { - val plan1 = parser.parsePlan("select transform(a, b) using 'func' as c, d from e") + val plan1 = parser.parsePlan("select transform(a, b) using 'func' from e where f < 10") + .asInstanceOf[ScriptTransformation].copy(ioschema = null) val plan2 = parser.parsePlan("map a, b using 'func' as c, d from e") - val plan3 = parser.parsePlan("reduce a, b using 'func' as c, d from e") - comparePlans(plan1, plan2) - comparePlans(plan2, plan3) - - assert(plan1.isInstanceOf[ScriptTransformation]) - assert(plan1.asInstanceOf[ScriptTransformation].input - == Seq(UnresolvedAttribute("a"), UnresolvedAttribute("b"))) - assert(plan1.asInstanceOf[ScriptTransformation].script - == "func") - assert(plan1.asInstanceOf[ScriptTransformation].output.map(_.name) - == Seq("c", "d")) - assert(plan1.asInstanceOf[ScriptTransformation].output.map(_.dataType) - == Seq(StringType, StringType)) - - val plan4 = parser.parsePlan("reduce a, b using 'func' as (c: int, d decimal(10, 0)) from e") - assert(plan4.isInstanceOf[ScriptTransformation]) - assert(plan1.asInstanceOf[ScriptTransformation].output.map(_.name) - == Seq("c", "d")) - assert(plan4.asInstanceOf[ScriptTransformation].output.map(_.dataType) - == Seq(IntegerType, DecimalType(10, 0))) + .asInstanceOf[ScriptTransformation].copy(ioschema = null) + val plan3 = parser.parsePlan("reduce a, b using 'func' as (c: int, d decimal(10, 0)) from e") + .asInstanceOf[ScriptTransformation].copy(ioschema = null) + + val p = ScriptTransformation( + Seq(UnresolvedAttribute("a"), UnresolvedAttribute("b")), + "func", Seq.empty, plans.table("e"), null) + + comparePlans(plan1, + p.copy(child = p.child.where('f < 10), output = Seq('key.string, 'value.string))) + comparePlans(plan2, + p.copy(output = Seq('c.string, 'd.string))) + comparePlans(plan3, + p.copy(output = Seq('c.int, 'd.decimal(10, 0)))) } test("use backticks in output of Script Transform") {