From 0cba40593cc4d757497c4697f0d528a868d04933 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Mon, 24 Oct 2022 17:19:40 -0700 Subject: [PATCH 1/3] #639: allow metadata fields (_id) to be used as QualifiedName Signed-off-by: Andrew Carbonetto --- .../org/opensearch/sql/analysis/Analyzer.java | 7 +++++ .../sql/ast/expression/QualifiedName.java | 17 ++++++++++- .../sql/legacy/CsvFormatResponseIT.java | 3 ++ .../OpenSearchDescribeIndexRequest.java | 5 ++++ .../response/OpenSearchResponse.java | 28 ++++++++++++++----- .../antlr/OpenSearchSQLIdentifierParser.g4 | 9 ++++++ sql/src/main/antlr/OpenSearchSQLLexer.g4 | 14 +++++++++- .../sql/sql/parser/AstExpressionBuilder.java | 11 ++++---- 8 files changed, 80 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index 5fc642fa06..fcf3e08940 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -153,6 +153,13 @@ public LogicalPlan visitRelation(Relation node, AnalysisContext context) { } table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v)); + // add OpenSearch metadata types + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_index"), ExprCoreType.STRING); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_id"), ExprCoreType.STRING); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_score"), ExprCoreType.FLOAT); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_maxscore"), ExprCoreType.FLOAT); + curEnv.define(new Symbol(Namespace.FIELD_NAME, "_sort"), ExprCoreType.LONG); + // Put index name or its alias in index namespace on type environment so qualifier // can be removed when analyzing qualified name. The value (expr type) here doesn't matter. curEnv.define(new Symbol(Namespace.INDEX_NAME, diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java index 8b16119dc0..a6c10c55dd 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java @@ -25,14 +25,27 @@ public class QualifiedName extends UnresolvedExpression { private final List parts; + @Getter + private final Boolean isMetadataField; + public QualifiedName(String name) { + this(name, Boolean.FALSE); + } + + public QualifiedName(String name, Boolean isMetadataField) { this.parts = Collections.singletonList(name); + this.isMetadataField = isMetadataField; + } + + public QualifiedName(Iterable parts) { + this(parts, Boolean.FALSE); } /** * QualifiedName Constructor. */ - public QualifiedName(Iterable parts) { + public QualifiedName(Iterable parts, Boolean isMetadataField) { + this.isMetadataField = isMetadataField; List partsList = StreamSupport.stream(parts.spliterator(), false).collect(toList()); if (partsList.isEmpty()) { throw new IllegalArgumentException("parts is empty"); @@ -110,4 +123,6 @@ public List getChild() { public R accept(AbstractNodeVisitor nodeVisitor, C context) { return nodeVisitor.visitQualifiedName(this, context); } + + public Boolean isMetadataField() { return Boolean.TRUE; } } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java index 9a08302577..0a97908e6f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/CsvFormatResponseIT.java @@ -99,6 +99,7 @@ public void specificPercentilesIntAndDouble() throws IOException { } } + // TODO: this should now work @Ignore("only work for legacy engine") public void nestedObjectsAndArraysAreQuoted() throws IOException { final String query = String.format(Locale.ROOT, "SELECT * FROM %s WHERE _id = 5", @@ -114,6 +115,7 @@ public void nestedObjectsAndArraysAreQuoted() throws IOException { Assert.assertThat(result, containsString(expectedMessage)); } + // TODO: this should now work @Ignore("only work for legacy engine") public void arraysAreQuotedInFlatMode() throws IOException { setFlatOption(true); @@ -546,6 +548,7 @@ public void twoCharsSeperator() throws Exception { } + // TODO: this should now work @Ignore("only work for legacy engine") public void includeIdAndNotTypeOrScore() throws Exception { String query = String.format(Locale.ROOT, diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java index f321497099..4bfa3ed0d6 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java @@ -118,6 +118,11 @@ public Map getFieldTypes() { .filter(entry -> !ExprCoreType.UNKNOWN.equals(entry.getValue())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); } + fieldTypes.put("_index", ExprCoreType.STRING); + fieldTypes.put("_id", ExprCoreType.STRING); + fieldTypes.put("_score", ExprCoreType.FLOAT); + fieldTypes.put("_maxscore", ExprCoreType.FLOAT); + fieldTypes.put("_sort", ExprCoreType.LONG); return fieldTypes; } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java index aadd73efdd..5563ee5136 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/response/OpenSearchResponse.java @@ -17,6 +17,9 @@ import org.opensearch.search.SearchHits; import org.opensearch.search.aggregations.Aggregations; import org.opensearch.sql.data.model.ExprTupleValue; +import org.opensearch.sql.data.model.ExprStringValue; +import org.opensearch.sql.data.model.ExprFloatValue; +import org.opensearch.sql.data.model.ExprLongValue; import org.opensearch.sql.data.model.ExprValue; import org.opensearch.sql.data.model.ExprValueUtils; import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory; @@ -92,14 +95,25 @@ public Iterator iterator() { return (ExprValue) ExprTupleValue.fromExprValueMap(builder.build()); }).iterator(); } else { + float maxScore = hits.getMaxScore(); return Arrays.stream(hits.getHits()) .map(hit -> { - ExprValue docData = exprValueFactory.construct(hit.getSourceAsString()); - if (hit.getHighlightFields().isEmpty()) { - return docData; - } else { - ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); - builder.putAll(docData.tupleValue()); + String source = hit.getSourceAsString(); + ExprValue docData = exprValueFactory.construct(source); + + ImmutableMap.Builder builder = new ImmutableMap.Builder<>(); + builder.putAll(docData.tupleValue()); + builder.put("_index", new ExprStringValue(hit.getIndex())); + builder.put("_id", new ExprStringValue(hit.getId())); + if (!Float.isNaN(hit.getScore())) { + builder.put("_score", new ExprFloatValue(hit.getScore())); + } + if (!Float.isNaN(maxScore)) { + builder.put("_maxscore", new ExprLongValue(maxScore)); + } + builder.put("_sort", new ExprLongValue(hit.getSeqNo())); + + if (!hit.getHighlightFields().isEmpty()) { var hlBuilder = ImmutableMap.builder(); for (var es : hit.getHighlightFields().entrySet()) { hlBuilder.put(es.getKey(), ExprValueUtils.collectionValue( @@ -107,8 +121,8 @@ public Iterator iterator() { t -> (t.toString())).collect(Collectors.toList()))); } builder.put("_highlight", ExprTupleValue.fromExprValueMap(hlBuilder.build())); - return ExprTupleValue.fromExprValueMap(builder.build()); } + return (ExprValue) ExprTupleValue.fromExprValueMap(builder.build()); }).iterator(); } } diff --git a/sql/src/main/antlr/OpenSearchSQLIdentifierParser.g4 b/sql/src/main/antlr/OpenSearchSQLIdentifierParser.g4 index 665d48c97c..cf5091e8cb 100644 --- a/sql/src/main/antlr/OpenSearchSQLIdentifierParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLIdentifierParser.g4 @@ -54,9 +54,18 @@ qualifiedName ident : DOT? ID | BACKTICK_QUOTE_ID + | metadataField | keywordsCanBeId ; +metadataField + : META_INDEX + | META_ID + | META_SCORE + | META_MAXSCORE + | META_SORT + ; + keywordsCanBeId : FULL | FIELD | D | T | TS // OD SQL and ODBC special diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 470ff5050f..297c077e02 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -135,6 +135,15 @@ SUBSTRING: 'SUBSTRING'; TRIM: 'TRIM'; +// Metadata fields can be ID + +META_INDEX: '_ID'; +META_ID: '_ID'; +META_SCORE: '_SCORE'; +META_MAXSCORE: '_MAXSCORE'; +META_SORT: '_SORT'; + + // Keywords, but can be ID // Common Keywords, but can be ID @@ -441,7 +450,6 @@ BACKTICK_QUOTE_ID: BQUOTA_STRING; // Fragments for Literal primitives fragment EXPONENT_NUM_PART: 'E' [-+]? DEC_DIGIT+; -fragment ID_LITERAL: [@*A-Z]+?[*A-Z_\-0-9]*; fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"'; fragment SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\''; fragment BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`'; @@ -449,6 +457,10 @@ fragment HEX_DIGIT: [0-9A-F]; fragment DEC_DIGIT: [0-9]; fragment BIT_STRING_L: 'B' '\'' [01]+ '\''; +// Identifiers cannot start with a single '_' since this an OpebSearch reserved +// metadata field. Two underscores (or more) is acceptable, such as '__field'. +fragment ID_LITERAL: ([_][_]|[@*A-Z])+?[*A-Z_\-0-9]*; + // Last tokens must generate Errors ERROR_RECOGNITION: . -> channel(ERRORCHANNEL); diff --git a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java index ebfafeec23..cab2eae389 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java +++ b/sql/src/main/java/org/opensearch/sql/sql/parser/AstExpressionBuilder.java @@ -430,12 +430,13 @@ private UnresolvedExpression visitConstantFunction(String functionName, } private QualifiedName visitIdentifiers(List identifiers) { + Boolean isMetadataField = identifiers.stream().filter(id -> id.metadataField() != null).findFirst().isPresent(); return new QualifiedName( - identifiers.stream() - .map(RuleContext::getText) - .map(StringUtils::unquoteIdentifier) - .collect(Collectors.toList()) - ); + identifiers.stream() + .map(RuleContext::getText) + .map(StringUtils::unquoteIdentifier) + .collect(Collectors.toList()), + isMetadataField); } private List singleFieldRelevanceArguments( From 086d028bdac490f84097d4c38c6e467966e67342 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Mon, 14 Nov 2022 12:31:42 -0800 Subject: [PATCH 2/3] Update to define and include metadata when visiting the expr node Signed-off-by: Andrew Carbonetto --- .../org/opensearch/sql/analysis/Analyzer.java | 7 ----- .../sql/analysis/ExpressionAnalyzer.java | 31 +++++++++++++++++++ .../sql/ast/expression/QualifiedName.java | 3 +- .../OpenSearchDescribeIndexRequest.java | 5 --- sql/src/main/antlr/OpenSearchSQLLexer.g4 | 2 +- 5 files changed, 33 insertions(+), 15 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index fcf3e08940..5fc642fa06 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -153,13 +153,6 @@ public LogicalPlan visitRelation(Relation node, AnalysisContext context) { } table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v)); - // add OpenSearch metadata types - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_index"), ExprCoreType.STRING); - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_id"), ExprCoreType.STRING); - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_score"), ExprCoreType.FLOAT); - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_maxscore"), ExprCoreType.FLOAT); - curEnv.define(new Symbol(Namespace.FIELD_NAME, "_sort"), ExprCoreType.LONG); - // Put index name or its alias in index namespace on type environment so qualifier // can be removed when analyzing qualified name. The value (expr type) here doesn't matter. curEnv.define(new Symbol(Namespace.INDEX_NAME, diff --git a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java index b877fcf673..eaf1107ff7 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java @@ -45,6 +45,7 @@ import org.opensearch.sql.ast.expression.Xor; import org.opensearch.sql.common.antlr.SyntaxCheckException; import org.opensearch.sql.data.model.ExprValueUtils; +import org.opensearch.sql.data.type.ExprCoreType; import org.opensearch.sql.data.type.ExprType; import org.opensearch.sql.exception.SemanticCheckException; import org.opensearch.sql.expression.DSL; @@ -291,6 +292,9 @@ public Expression visitAllFields(AllFields node, AnalysisContext context) { @Override public Expression visitQualifiedName(QualifiedName node, AnalysisContext context) { QualifierAnalyzer qualifierAnalyzer = new QualifierAnalyzer(context); + if (node.isMetadataField().booleanValue()) { + return visitMetadata(qualifierAnalyzer.unqualified(node), context); + } return visitIdentifier(qualifierAnalyzer.unqualified(node), context); } @@ -307,6 +311,33 @@ public Expression visitUnresolvedArgument(UnresolvedArgument node, AnalysisConte return new NamedArgumentExpression(node.getArgName(), node.getValue().accept(this, context)); } + private Expression visitMetadata(String ident, AnalysisContext context) { + // ParseExpression will always override ReferenceExpression when ident conflicts + for (NamedExpression expr : context.getNamedParseExpressions()) { + if (expr.getNameOrAlias().equals(ident) && expr.getDelegated() instanceof ParseExpression) { + return expr.getDelegated(); + } + } + + ReferenceExpression ref; + switch(ident.toLowerCase()) { + case "_index": + case "_id": + ref = DSL.ref(ident, ExprCoreType.STRING); + break; + case "_score": + case "_maxscore": + ref = DSL.ref(ident, ExprCoreType.FLOAT); + break; + case "_sort": + ref = DSL.ref(ident, ExprCoreType.LONG); + break; + default: + throw new SemanticCheckException("invalid metadata field"); + } + return ref; + } + private Expression visitIdentifier(String ident, AnalysisContext context) { // ParseExpression will always override ReferenceExpression when ident conflicts for (NamedExpression expr : context.getNamedParseExpressions()) { diff --git a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java index a6c10c55dd..85f2bf63a6 100644 --- a/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java +++ b/core/src/main/java/org/opensearch/sql/ast/expression/QualifiedName.java @@ -25,7 +25,6 @@ public class QualifiedName extends UnresolvedExpression { private final List parts; - @Getter private final Boolean isMetadataField; public QualifiedName(String name) { @@ -124,5 +123,5 @@ public R accept(AbstractNodeVisitor nodeVisitor, C context) { return nodeVisitor.visitQualifiedName(this, context); } - public Boolean isMetadataField() { return Boolean.TRUE; } + public Boolean isMetadataField() { return this.isMetadataField; } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java index 4bfa3ed0d6..f321497099 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/system/OpenSearchDescribeIndexRequest.java @@ -118,11 +118,6 @@ public Map getFieldTypes() { .filter(entry -> !ExprCoreType.UNKNOWN.equals(entry.getValue())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); } - fieldTypes.put("_index", ExprCoreType.STRING); - fieldTypes.put("_id", ExprCoreType.STRING); - fieldTypes.put("_score", ExprCoreType.FLOAT); - fieldTypes.put("_maxscore", ExprCoreType.FLOAT); - fieldTypes.put("_sort", ExprCoreType.LONG); return fieldTypes; } diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 297c077e02..262b5447aa 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -137,7 +137,7 @@ TRIM: 'TRIM'; // Metadata fields can be ID -META_INDEX: '_ID'; +META_INDEX: '_INDEX'; META_ID: '_ID'; META_SCORE: '_SCORE'; META_MAXSCORE: '_MAXSCORE'; From 92096b4a926cf41f964fdcf981d1a402f09ce260 Mon Sep 17 00:00:00 2001 From: Andrew Carbonetto Date: Mon, 5 Dec 2022 09:31:46 -0800 Subject: [PATCH 3/3] Update sql/src/main/antlr/OpenSearchSQLLexer.g4 Co-authored-by: Yury-Fridlyand --- sql/src/main/antlr/OpenSearchSQLLexer.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 262b5447aa..38c4025d70 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -457,7 +457,7 @@ fragment HEX_DIGIT: [0-9A-F]; fragment DEC_DIGIT: [0-9]; fragment BIT_STRING_L: 'B' '\'' [01]+ '\''; -// Identifiers cannot start with a single '_' since this an OpebSearch reserved +// Identifiers cannot start with a single '_' since this an OpenSearch reserved // metadata field. Two underscores (or more) is acceptable, such as '__field'. fragment ID_LITERAL: ([_][_]|[@*A-Z])+?[*A-Z_\-0-9]*;