Skip to content

Commit

Permalink
[SC-5840][DIRECTORYCOMMIT] Move Vacuum-related code to com.databricks…
Browse files Browse the repository at this point in the history
….sql

## What changes were proposed in this pull request?

This patch moves all Vacuum-related code from `org.apache.spark` to `com.databricks.sql` as part of the general task of clearly separating Edge issues in order to reduce merge conflicts with OSS.

`AclCommandParser` is renamed to a more general `DatabricksSqlParser`, to be used for all DB-specific syntax and is moved to a new package called `com.databricks.sql.parser`.

`VacuumTableCommand` is moved from `org.apache.spark.sql.execution.command` to `com.databricks.sql.transaction`.

## How was this patch tested?

Tests in project `spark-sql` pass.

Author: Adrian Ionescu <[email protected]>

Closes apache#242 from adrian-ionescu/SC-5840.
  • Loading branch information
adrian-ionescu committed Feb 22, 2017
1 parent 4449f24 commit 90f0167
Show file tree
Hide file tree
Showing 14 changed files with 168 additions and 62 deletions.
2 changes: 1 addition & 1 deletion project/SparkBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ object Catalyst {

object SQL {
lazy val settings = antlr4Settings ++ Seq(
antlr4PackageName in Antlr4 := Some("com.databricks.sql.acl"),
antlr4PackageName in Antlr4 := Some("com.databricks.sql.parser"),
antlr4GenListener in Antlr4 := true,
antlr4GenVisitor in Antlr4 := true,
initialCommands in console :=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ statement
| SET ROLE .*? #failNativeCommand
| SET .*? #setConfiguration
| RESET #resetConfiguration
| VACUUM (path=STRING | tableIdentifier) (RETAIN number HOURS)? #vacuumTable
| unsupportedHiveNativeCommands .*? #failNativeCommand
;

Expand Down Expand Up @@ -702,7 +701,6 @@ nonReserved
| AND | CASE | CAST | DISTINCT | DIV | ELSE | END | FUNCTION | INTERVAL | MACRO | OR | STRATIFY | THEN
| UNBOUNDED | WHEN
| DATABASE | SELECT | FROM | WHERE | HAVING | TO | TABLE | WITH | NOT | CURRENT_DATE | CURRENT_TIMESTAMP
| VACUUM | RETAIN | HOURS
;

SELECT: 'SELECT';
Expand Down Expand Up @@ -812,9 +810,6 @@ START: 'START';
TRANSACTION: 'TRANSACTION';
COMMIT: 'COMMIT';
ROLLBACK: 'ROLLBACK';
VACUUM: 'VACUUM';
RETAIN: 'RETAIN';
HOURS: 'HOURS';
MACRO: 'MACRO';

IF: 'IF';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,31 @@
* License, Version 2.0, a copy of which you may obtain at
* http://www.apache.org/licenses/LICENSE-2.0
*/
grammar AclCommandBase;
grammar DatabricksSqlBase;

@members {
/**
* Verify whether current token is a valid decimal token (which contains dot).
* Returns true if the character that follows the token is not a digit or letter or underscore.
*
* For example:
* For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
* For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
* For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
* For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is folllowed
* by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
* which is not a digit or letter or underscore.
*/
public boolean isValidDecimal() {
int nextChar = _input.LA(1);
if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
nextChar == '_') {
return false;
} else {
return true;
}
}
}

tokens {
DELIMITER
Expand All @@ -17,11 +41,12 @@ singleStatement
;

statement
: managePermissions #managePermissionsAlt
| ALTER securable OWNER TO identifier #alterOwner
| MSCK REPAIR securable PRIVILEGES #repairPrivileges
| SHOW GRANT identifier? ON (ALL| securable) #showPermissions
| .*? #passThrough
: managePermissions #managePermissionsAlt
| ALTER securable OWNER TO identifier #alterOwner
| MSCK REPAIR securable PRIVILEGES #repairPrivileges
| SHOW GRANT identifier? ON (ALL| securable) #showPermissions
| VACUUM (path=STRING | table=qualifiedName) (RETAIN number HOURS)? #vacuumTable
| .*? #passThrough
;

managePermissions
Expand Down Expand Up @@ -57,9 +82,15 @@ quotedIdentifier
: BACKQUOTED_IDENTIFIER
;

number
: DECIMAL_VALUE #decimalLiteral
| INTEGER_VALUE #integerLiteral
;

nonReserved
: ALTER | OWNER | TO | MSCK | REPAIR | PRIVILEGES | SHOW | GRANT | ON | ALL | WITH | OPTION |
REVOKE | FOR | FROM | CATALOG | DATABASE | TABLE | VIEW | FUNCTION | ANONYMOUS | FILE | ANY
: ALTER | OWNER | TO | MSCK | REPAIR | PRIVILEGES | SHOW | GRANT | ON | ALL | WITH | OPTION
| REVOKE | FOR | FROM | CATALOG | DATABASE | TABLE | VIEW | FUNCTION | ANONYMOUS | FILE | ANY
| VACUUM | RETAIN | HOURS
;

ALTER: 'ALTER';
Expand All @@ -85,12 +116,24 @@ FUNCTION: 'FUNCTION';
ANONYMOUS: 'ANONYMOUS';
FILE: 'FILE';
ANY: 'ANY';
VACUUM: 'VACUUM';
RETAIN: 'RETAIN';
HOURS: 'HOURS';

STRING
: '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
| '\"' ( ~('\"'|'\\') | ('\\' .) )* '\"'
;

INTEGER_VALUE
: DIGIT+
;

DECIMAL_VALUE
: DIGIT+ EXPONENT
| DECIMAL_DIGITS EXPONENT? {isValidDecimal()}?
;

IDENTIFIER
: (LETTER | DIGIT | '_')+
;
Expand All @@ -99,6 +142,15 @@ BACKQUOTED_IDENTIFIER
: '`' ( ~'`' | '``' )* '`'
;

fragment DECIMAL_DIGITS
: DIGIT+ '.' DIGIT*
| '.' DIGIT+
;

fragment EXPONENT
: 'E' [+-]? DIGIT+
;

fragment DIGIT
: [0-9]
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package com.databricks.sql.acl
import scala.util.control.NonFatal

import com.databricks.sql.DatabricksStaticSQLConf
import com.databricks.sql.parser.DatabricksSqlParser

import org.apache.spark.sql.{SparkSession, SparkSessionExtensions}
import org.apache.spark.sql.catalog.BaseCatalogHooks
Expand Down Expand Up @@ -57,7 +58,7 @@ class AclExtensions extends (SparkSessionExtensions => Unit) {
}
extensions.injectParser { (session, delegate) =>
if (isAclEnabled(session)) {
new AclCommandParser(client(session), delegate)
new DatabricksSqlParser(client(session), delegate)
} else {
delegate
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package com.databricks.sql.acl

import com.databricks.sql.acl.Action._
import com.databricks.sql.transaction.VacuumTableCommand

import org.apache.spark.sql.catalog.{Catalog => PublicCatalog}
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@
* License, Version 2.0, a copy of which you may obtain at
* http://www.apache.org/licenses/LICENSE-2.0
*/
package com.databricks.sql.acl
package com.databricks.sql.parser

import scala.collection.JavaConverters._

import com.databricks.sql.acl.AclCommandBaseParser._
import com.databricks.sql.acl._
import com.databricks.sql.parser.DatabricksSqlBaseParser._
import com.databricks.sql.transaction.VacuumTableCommand

import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.parser.{ParseException, ParserUtils}
Expand All @@ -19,8 +21,8 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
/**
* Build an ACL related [[LogicalPlan]] from an ANTLR4 parser tree
*/
class AstCommandBuilder(client: AclClient)
extends AclCommandBaseBaseVisitor[AnyRef] {
class DatabricksSqlCommandBuilder(client: AclClient)
extends DatabricksSqlBaseBaseVisitor[AnyRef] {
import ParserUtils._

/**
Expand Down Expand Up @@ -128,18 +130,18 @@ class AstCommandBuilder(client: AclClient)
* Create a [[Securable]] object.
*/
override def visitSecurable(ctx: SecurableContext): Securable = withOrigin(ctx) {
Option(ctx.objectType).map(_.getType).getOrElse(AclCommandBaseParser.TABLE) match {
case AclCommandBaseParser.CATALOG =>
Option(ctx.objectType).map(_.getType).getOrElse(DatabricksSqlBaseParser.TABLE) match {
case DatabricksSqlBaseParser.CATALOG =>
Catalog
case AclCommandBaseParser.DATABASE =>
case DatabricksSqlBaseParser.DATABASE =>
Database(ctx.identifier.getText)
case AclCommandBaseParser.VIEW | AclCommandBaseParser.TABLE =>
case DatabricksSqlBaseParser.VIEW | DatabricksSqlBaseParser.TABLE =>
Table(visitTableIdentifier(ctx.qualifiedName))
case AclCommandBaseParser.FUNCTION if ctx.ANONYMOUS != null =>
case DatabricksSqlBaseParser.FUNCTION if ctx.ANONYMOUS != null =>
AnonymousFunction
case AclCommandBaseParser.FUNCTION =>
case DatabricksSqlBaseParser.FUNCTION =>
Function(visitFunctionIdentifier(ctx.qualifiedName))
case AclCommandBaseParser.FILE =>
case DatabricksSqlBaseParser.FILE =>
AnyFile
case _ =>
throw new ParseException("Unknown Securable Object", ctx)
Expand Down Expand Up @@ -170,6 +172,21 @@ class AstCommandBuilder(client: AclClient)
}
}

/**
* Create a [[VacuumTable]] logical plan.
* Example SQL :
* {{{
* VACUUM ('/path/to/dir' | table_name) [RETAIN number HOURS];
* }}}
*/
override def visitVacuumTable(
ctx: VacuumTableContext): LogicalPlan = withOrigin(ctx) {
VacuumTableCommand(
Option(ctx.path).map(string),
Option(ctx.table).map(visitTableIdentifier),
Option(ctx.number).map(_.getText.toDouble))
}

/**
* Return null for every other query. These queries should be passed to a delegate parser.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
* License, Version 2.0, a copy of which you may obtain at
* http://www.apache.org/licenses/LICENSE-2.0
*/
package com.databricks.sql.acl
package com.databricks.sql.parser

import com.databricks.sql.acl.AclCommandBaseParser._
import com.databricks.sql.acl.AclClient
import com.databricks.sql.parser.DatabricksSqlBaseParser._
import org.antlr.v4.runtime._
import org.antlr.v4.runtime.atn.PredictionMode
import org.antlr.v4.runtime.misc.ParseCancellationException

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.{AnalysisException, Dataset, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.parser._
Expand All @@ -25,9 +26,9 @@ import org.apache.spark.sql.types.DataType
* Parser for ACL related commands. The parser passes the query to an underlying (more complete)
* parser if it cannot parse the query.
*/
class AclCommandParser(client: AclClient, delegate: ParserInterface) extends ParserInterface {
class DatabricksSqlParser(client: AclClient, delegate: ParserInterface) extends ParserInterface {

val builder = new AstCommandBuilder(client)
val builder = new DatabricksSqlCommandBuilder(client)

override def parseDataType(sqlText: String): DataType =
delegate.parseDataType(sqlText)
Expand All @@ -46,13 +47,13 @@ class AclCommandParser(client: AclClient, delegate: ParserInterface) extends Par
}
}

protected def parse[T](command: String)(toResult: AclCommandBaseParser => T): T = {
val lexer = new AclCommandBaseLexer(new ANTLRNoCaseStringStream(command))
protected def parse[T](command: String)(toResult: DatabricksSqlBaseParser => T): T = {
val lexer = new DatabricksSqlBaseLexer(new ANTLRNoCaseStringStream(command))
lexer.removeErrorListeners()
lexer.addErrorListener(ParseErrorListener)

val tokenStream = new CommonTokenStream(lexer)
val parser = new AclCommandBaseParser(tokenStream)
val parser = new DatabricksSqlBaseParser(tokenStream)
parser.addParseListener(PostProcessor)
parser.removeErrorListeners()
parser.addErrorListener(ParseErrorListener)
Expand Down Expand Up @@ -96,7 +97,7 @@ class ANTLRNoCaseStringStream(input: String) extends ANTLRInputStream(input) {
/**
* The post-processor validates & cleans-up the parse tree during the parse process.
*/
case object PostProcessor extends AclCommandBaseBaseListener {
case object PostProcessor extends DatabricksSqlBaseBaseListener {

/** Remove the back ticks from an Identifier. */
override def exitQuotedIdentifier(ctx: QuotedIdentifierContext): Unit = {
Expand All @@ -121,7 +122,7 @@ case object PostProcessor extends AclCommandBaseBaseListener {
val token = ctx.getChild(0).getPayload.asInstanceOf[Token]
parent.addChild(f(new CommonToken(
new org.antlr.v4.runtime.misc.Pair(token.getTokenSource, token.getInputStream),
AclCommandBaseParser.IDENTIFIER,
DatabricksSqlBaseParser.IDENTIFIER,
token.getChannel,
token.getStartIndex + stripMargins,
token.getStopIndex - stripMargins)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* limitations under the License.
*/

package org.apache.spark.sql.execution.command
package com.databricks.sql.transaction

import java.net.URI

Expand All @@ -24,6 +24,7 @@ import org.apache.hadoop.fs.Path
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.execution.command.RunnableCommand
import org.apache.spark.sql.transaction.DatabricksAtomicCommitProtocol
import org.apache.spark.sql.types._

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ class SparkSession private(
this(sc, None, new SparkSessionExtensions)
}

private[sql] def this(sc: SparkContext, extensions: Option[SparkSessionExtensions]) {
this(sc, None, extensions.getOrElse(new SparkSessionExtensions))
}

sparkContext.assertNotStopped()

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,21 +86,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
ResetCommand
}

/**
* Create a [[VacuumTable]] logical plan.
* Example SQL :
* {{{
* VACUUM ('/path/to/dir' | table_name) [RETAIN number HOURS];
* }}}
*/
override def visitVacuumTable(
ctx: VacuumTableContext): LogicalPlan = withOrigin(ctx) {
VacuumTableCommand(
Option(ctx.path).map(string),
Option(ctx.tableIdentifier).map(visitTableIdentifier),
Option(ctx.number).map(_.getText.toDouble))
}

/**
* Create an [[AnalyzeTableCommand]] command or an [[AnalyzeColumnCommand]] command.
* Example SQL for analyzing table :
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package com.databricks.sql.acl

import com.databricks.sql.acl.Action.{ReadMetadata, Select}
import com.databricks.sql.transaction.VacuumTableCommand
import org.apache.hadoop.fs.Path
import org.mockito.Mockito._

Expand Down
Loading

0 comments on commit 90f0167

Please sign in to comment.