Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert Presto SQL to Calcite SqlNode #171

Merged
merged 10 commits into from
Nov 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 4 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ https://iceberg.apache.org/
Copyright 2018-2021 Apache Software Foundation
License: Apache-2.0

Trinodb trino
https://trino.io/
License: Apache-2.0

================================================================================

In addition, this product automatically loads third-party code from an
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/**
* Copyright 2021 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.common.calcite;

import java.util.*;

import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.sql.*;
import org.apache.calcite.sql.parser.SqlParseException;
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.validate.SqlConformanceEnum;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static java.lang.String.format;
import static org.apache.calcite.sql.parser.SqlParserPos.ZERO;


public class CalciteUtil {
private final static Logger LOG = LoggerFactory.getLogger(CalciteUtil.class);

public static SqlParser.Config SQL_PARSER_CONFIG =
SqlParser.configBuilder().setCaseSensitive(false).setUnquotedCasing(Casing.UNCHANGED)
.setQuotedCasing(Casing.UNCHANGED).setConformance(SqlConformanceEnum.ORACLE_10).build();

public static SqlNode parseStatement(String query) throws SqlParseException {
String quotedQuery = quoteReservedWords(query);
try {
String expression = "(" + quotedQuery + ")";
SqlParser sqlParser = SqlParser.create(expression, SQL_PARSER_CONFIG);
return sqlParser.parseExpression();
} catch (SqlParseException e) {
return parseQuery(quotedQuery);
}
}

private static SqlNode parseQuery(String query) throws SqlParseException {
try {
SqlParser sqlParser = SqlParser.create(query);
return sqlParser.parseQuery();
} catch (SqlParseException e) {
LOG.error("Failed to parse query: {}", query);
throw e;
}
}

public static SqlNode createLiteralNumber(long value, SqlParserPos pos) {
return SqlLiteral.createExactNumeric(String.valueOf(value), pos);
}

public static String convertScientificDouble(double value) {
String valueString = String.valueOf(value);
if (valueString.toUpperCase().contains("E")) {
int index = valueString.indexOf("E-") > 0 ? valueString.indexOf("E-") + 2 : valueString.indexOf("E") + 1;
Integer scale = Integer.valueOf(valueString.substring(index));
valueString = format("%." + format("%d", scale) + "f", value);
}
return valueString;
}

public static SqlNode createLiteralNumber(double value, SqlParserPos pos) {
return SqlLiteral.createExactNumeric(convertScientificDouble(value), pos);
}

public static SqlLiteral createLiteralBoolean(boolean value, SqlParserPos pos) {
return SqlLiteral.createBoolean(value, pos);
}

public static SqlNode createStringLiteral(String value, SqlParserPos pos) {
return SqlLiteral.createCharString(value, pos);
}

public static SqlNode createBinaryLiteral(byte[] value, SqlParserPos pos) {
return SqlLiteral.createBinaryString(value, pos);
}

public static SqlNode createLiteralNull(SqlParserPos pos) {
return SqlLiteral.createNull(pos);
}

public static SqlCall createCall(SqlOperator sqlOperator, List<SqlNode> sqlNodeList) {
return sqlOperator.createCall(createSqlNodeList(sqlNodeList));
}

public static SqlCall createCall(SqlOperator sqlOperator, List<SqlNode> sqlNodeList, SqlParserPos pos) {
return sqlOperator.createCall(new SqlNodeList(sqlNodeList, pos));
}

public static SqlCall createCall(SqlOperator sqlOperator, List<SqlNode> sqlNodeList, SqlLiteral functionQuantifier) {
return sqlOperator.createCall(functionQuantifier, ZERO, sqlNodeList.toArray(new SqlNode[0]));
}

public static String[] splitIdentifierString(String identifierString) {
return identifierString.split("\\.");
}

public static SqlIdentifier createSqlIdentifier(SqlParserPos pos, String... path) {
return new SqlIdentifier(Arrays.asList(path), pos);
}

public static SqlIdentifier createStarIdentifier(SqlParserPos pos) {
return createSqlIdentifier(pos, "");
}

public static SqlNodeList createSqlNodeList(Collection<SqlNode> sqlNodeList) {
return new SqlNodeList(sqlNodeList, ZERO);
}

public static SqlNodeList createSqlNodeList(Collection<SqlNode> sqlNodeList, SqlParserPos pos) {
return new SqlNodeList(sqlNodeList, pos);
}

public static String quoteReservedWords(String s) {
if (s == null) {
return s;
}

s = s.replaceAll("(^|[^\"]\\b)time(\\b[^']|$)", "$1\"time\"$2");
s = s.replaceAll("(^|\\W)rank($|\\W)", "$1\"rank\"$2");
return s;
}
}
4 changes: 3 additions & 1 deletion coral-trino/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ apply from: "$rootDir/gradle/java-publication.gradle"
dependencies {
compile deps.'gson'
compile deps.'javax-annotation'
compile deps.'trino-parser'
compile project(path: ':coral-hive')
implementation 'com.github.vertical-blank:sql-formatter:2.0.2'

testCompile deps.'assertj'
testCompile deps.'trino-parser'


testCompile(deps.'hive'.'hive-exec-core') {
exclude group: 'org.apache.avro', module: 'avro-tools'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/**
* Copyright 2021 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.trino.trino2rel;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import com.github.vertical_blank.sqlformatter.core.AbstractFormatter;
import com.github.vertical_blank.sqlformatter.core.DialectConfig;
import com.github.vertical_blank.sqlformatter.core.FormatConfig;
import com.github.vertical_blank.sqlformatter.core.Tokenizer;
import com.github.vertical_blank.sqlformatter.languages.StandardSqlFormatter;


/**
* This is a reimplementation of vertical_blank's {@link StandardSqlFormatter}.
*
* <p>We need to add "FULL JOIN" to the list of reservedNewlineWords for Calcite.
*/
public class CalciteSqlFormatter extends AbstractFormatter {

private static final List<String> reservedWords = Arrays.asList("ACCESSIBLE", "ACTION", "AGAINST", "AGGREGATE",
"ALGORITHM", "ALL", "ALTER", "ANALYSE", "ANALYZE", "AS", "ASC", "AUTOCOMMIT", "AUTO_INCREMENT", "BACKUP", "BEGIN",
"BETWEEN", "BINLOG", "BOTH", "CASCADE", "CASE", "CHANGE", "CHANGED", "CHARACTER SET", "CHARSET", "CHECK",
"CHECKSUM", "COLLATE", "COLLATION", "COLUMN", "COLUMNS", "COMMENT", "COMMIT", "COMMITTED", "COMPRESSED",
"CONCURRENT", "CONSTRAINT", "CONTAINS", "CONVERT", "CREATE", "CROSS", "CURRENT_TIMESTAMP", "DATABASE",
"DATABASES", "DAY", "DAY_HOUR", "DAY_MINUTE", "DAY_SECOND", "DEFAULT", "DEFINER", "DELAYED", "DELETE", "DESC",
"DESCRIBE", "DETERMINISTIC", "DISTINCT", "DISTINCTROW", "DIV", "DO", "DROP", "DUMPFILE", "DUPLICATE", "DYNAMIC",
"ELSE", "ENCLOSED", "END", "ENGINE", "ENGINES", "ENGINE_TYPE", "ESCAPE", "ESCAPED", "EVENTS", "EXEC", "EXECUTE",
"EXISTS", "EXPLAIN", "EXTENDED", "FAST", "FETCH", "FIELDS", "FILE", "FIRST", "FIXED", "FLUSH", "FOR", "FORCE",
"FOREIGN", "FULL", "FULLTEXT", "FUNCTION", "GLOBAL", "GRANT", "GRANTS", "GROUP_CONCAT", "HEAP", "HIGH_PRIORITY",
"HOSTS", "HOUR", "HOUR_MINUTE", "HOUR_SECOND", "IDENTIFIED", "IF", "IFNULL", "IGNORE", "IN", "INDEX", "INDEXES",
"INFILE", "INSERT", "INSERT_ID", "INSERT_METHOD", "INTERVAL", "INTO", "INVOKER", "IS", "ISOLATION", "KEY", "KEYS",
"KILL", "LAST_INSERT_ID", "LEADING", "LEVEL", "LIKE", "LINEAR", "LINES", "LOAD", "LOCAL", "LOCK", "LOCKS", "LOGS",
"LOW_PRIORITY", "MARIA", "MASTER", "MASTER_CONNECT_RETRY", "MASTER_HOST", "MASTER_LOG_FILE", "MATCH",
"MAX_CONNECTIONS_PER_HOUR", "MAX_QUERIES_PER_HOUR", "MAX_ROWS", "MAX_UPDATES_PER_HOUR", "MAX_USER_CONNECTIONS",
"MEDIUM", "MERGE", "MINUTE", "MINUTE_SECOND", "MIN_ROWS", "MODE", "MODIFY", "MONTH", "MRG_MYISAM", "MYISAM",
"NAMES", "NATURAL", "NOT", "NOW()", "NULL", "OFFSET", "ON DELETE", "ON UPDATE", "ON", "ONLY", "OPEN", "OPTIMIZE",
"OPTION", "OPTIONALLY", "OUTFILE", "PACK_KEYS", "PAGE", "PARTIAL", "PARTITION", "PARTITIONS", "PASSWORD",
"PRIMARY", "PRIVILEGES", "PROCEDURE", "PROCESS", "PROCESSLIST", "PURGE", "QUICK", "RAID0", "RAID_CHUNKS",
"RAID_CHUNKSIZE", "RAID_TYPE", "RANGE", "READ", "READ_ONLY", "READ_WRITE", "REFERENCES", "REGEXP", "RELOAD",
"RENAME", "REPAIR", "REPEATABLE", "REPLACE", "REPLICATION", "RESET", "RESTORE", "RESTRICT", "RETURN", "RETURNS",
"REVOKE", "RLIKE", "ROLLBACK", "ROW", "ROWS", "ROW_FORMAT", "SECOND", "SECURITY", "SEPARATOR", "SERIALIZABLE",
"SESSION", "SHARE", "SHOW", "SHUTDOWN", "SLAVE", "SONAME", "SOUNDS", "SQL", "SQL_AUTO_IS_NULL", "SQL_BIG_RESULT",
"SQL_BIG_SELECTS", "SQL_BIG_TABLES", "SQL_BUFFER_RESULT", "SQL_CACHE", "SQL_CALC_FOUND_ROWS", "SQL_LOG_BIN",
"SQL_LOG_OFF", "SQL_LOG_UPDATE", "SQL_LOW_PRIORITY_UPDATES", "SQL_MAX_JOIN_SIZE", "SQL_NO_CACHE",
"SQL_QUOTE_SHOW_CREATE", "SQL_SAFE_UPDATES", "SQL_SELECT_LIMIT", "SQL_SLAVE_SKIP_COUNTER", "SQL_SMALL_RESULT",
"SQL_WARNINGS", "START", "STARTING", "STATUS", "STOP", "STORAGE", "STRAIGHT_JOIN", "STRING", "STRIPED", "SUPER",
"TABLE", "TABLES", "TEMPORARY", "TERMINATED", "THEN", "TO", "TRAILING", "TRANSACTIONAL", "TRUE", "TRUNCATE",
"TYPE", "TYPES", "UNCOMMITTED", "UNIQUE", "UNLOCK", "UNSIGNED", "USAGE", "USE", "USING", "VARIABLES", "VIEW",
"WHEN", "WITH", "WORK", "WRITE", "YEAR_MONTH");

private static final List<String> reservedTopLevelWords =
Arrays.asList("ADD", "AFTER", "ALTER COLUMN", "ALTER TABLE", "DELETE FROM", "EXCEPT", "FETCH FIRST", "FROM",
"GROUP BY", "GO", "HAVING", "INSERT INTO", "INSERT", "INTERSECT", "LIMIT", "MODIFY", "ORDER BY", "SELECT",
"SET CURRENT SCHEMA", "SET SCHEMA", "SET", "UNION ALL", "UNION", "UPDATE", "VALUES", "WHERE");

private static final List<String> reservedNewlineWords = Arrays.asList("AND", "CROSS APPLY", "CROSS JOIN", "ELSE",
"INNER JOIN", "JOIN", "LEFT JOIN", "LEFT OUTER JOIN", "OR", "OUTER APPLY", "OUTER JOIN", "RIGHT JOIN",
"RIGHT OUTER JOIN", "FULL JOIN", "FULL OUTER JOIN", "WHEN", "XOR");

private static final DialectConfig DIALECT_CONFIG = defaultDialectConfig();

private static DialectConfig defaultDialectConfig() {
return DialectConfig.builder().reservedWords(reservedWords).reservedTopLevelWords(reservedTopLevelWords)
.reservedNewlineWords(reservedNewlineWords).stringTypes(Arrays.asList("\"\"", "N''", "''", "``", "[]"))
.openParens(Arrays.asList("(", "CASE")).closeParens(Arrays.asList(")", "END"))
.indexedPlaceholderTypes(Collections.singletonList("?")).namedPlaceholderTypes(Arrays.asList("@", ":"))
.lineCommentTypes(Arrays.asList("#", "--")).build();
}

public static final CalciteSqlFormatter FORMATTER =
new CalciteSqlFormatter(FormatConfig.builder().indent(FormatConfig.DEFAULT_INDENT).build());

public CalciteSqlFormatter(FormatConfig cfg) {
super(cfg);
}

@Override
public String format(String query) {
return super.format(query).replaceAll(" <= > ", " <=> ");
}

@Override
public Tokenizer tokenizer() {
return new Tokenizer(DIALECT_CONFIG);
}

@Override
public DialectConfig dialectConfig() {
return DIALECT_CONFIG;
}
}
Loading