Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#639: allow metadata fields (_id) to be used as QualifiedName #142

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.opensearch.sql.ast.expression.Xor;
import org.opensearch.sql.common.antlr.SyntaxCheckException;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.DSL;
Expand Down Expand Up @@ -291,6 +292,9 @@ public Expression visitAllFields(AllFields node, AnalysisContext context) {
@Override
public Expression visitQualifiedName(QualifiedName node, AnalysisContext context) {
QualifierAnalyzer qualifierAnalyzer = new QualifierAnalyzer(context);
if (node.isMetadataField().booleanValue()) {
return visitMetadata(qualifierAnalyzer.unqualified(node), context);
}
return visitIdentifier(qualifierAnalyzer.unqualified(node), context);
}

Expand All @@ -307,6 +311,33 @@ public Expression visitUnresolvedArgument(UnresolvedArgument node, AnalysisConte
return new NamedArgumentExpression(node.getArgName(), node.getValue().accept(this, context));
}

private Expression visitMetadata(String ident, AnalysisContext context) {
// ParseExpression will always override ReferenceExpression when ident conflicts
for (NamedExpression expr : context.getNamedParseExpressions()) {
if (expr.getNameOrAlias().equals(ident) && expr.getDelegated() instanceof ParseExpression) {
return expr.getDelegated();
}
}

ReferenceExpression ref;
switch(ident.toLowerCase()) {
case "_index":
case "_id":
ref = DSL.ref(ident, ExprCoreType.STRING);
break;
case "_score":
case "_maxscore":
ref = DSL.ref(ident, ExprCoreType.FLOAT);
break;
case "_sort":
ref = DSL.ref(ident, ExprCoreType.LONG);
break;
default:
throw new SemanticCheckException("invalid metadata field");
}
return ref;
}

private Expression visitIdentifier(String ident, AnalysisContext context) {
// ParseExpression will always override ReferenceExpression when ident conflicts
for (NamedExpression expr : context.getNamedParseExpressions()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,26 @@
public class QualifiedName extends UnresolvedExpression {
private final List<String> parts;

private final Boolean isMetadataField;
acarbonetto marked this conversation as resolved.
Show resolved Hide resolved

public QualifiedName(String name) {
this(name, Boolean.FALSE);
}

public QualifiedName(String name, Boolean isMetadataField) {
this.parts = Collections.singletonList(name);
this.isMetadataField = isMetadataField;
}

public QualifiedName(Iterable<String> parts) {
this(parts, Boolean.FALSE);
}

/**
* QualifiedName Constructor.
Copy link

@GabeFernandez310 GabeFernandez310 Oct 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this comment need to be moved up? Are the above not considered as constructors as well?

*/
public QualifiedName(Iterable<String> parts) {
public QualifiedName(Iterable<String> parts, Boolean isMetadataField) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be better to determine isMetadataField dynamically rather than through constructor parameter?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's easiest to define it at constructor time... since we need to look up the expression tree.

this.isMetadataField = isMetadataField;
List<String> partsList = StreamSupport.stream(parts.spliterator(), false).collect(toList());
if (partsList.isEmpty()) {
throw new IllegalArgumentException("parts is empty");
Expand Down Expand Up @@ -110,4 +122,6 @@ public List<UnresolvedExpression> getChild() {
public <R, C> R accept(AbstractNodeVisitor<R, C> nodeVisitor, C context) {
return nodeVisitor.visitQualifiedName(this, context);
}

public Boolean isMetadataField() { return this.isMetadataField; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ public void specificPercentilesIntAndDouble() throws IOException {
}
}

// TODO: this should now work
@Ignore("only work for legacy engine")
public void nestedObjectsAndArraysAreQuoted() throws IOException {
final String query = String.format(Locale.ROOT, "SELECT * FROM %s WHERE _id = 5",
Expand All @@ -114,6 +115,7 @@ public void nestedObjectsAndArraysAreQuoted() throws IOException {
Assert.assertThat(result, containsString(expectedMessage));
}

// TODO: this should now work
@Ignore("only work for legacy engine")
public void arraysAreQuotedInFlatMode() throws IOException {
setFlatOption(true);
Expand Down Expand Up @@ -546,6 +548,7 @@ public void twoCharsSeperator() throws Exception {

}

// TODO: this should now work
@Ignore("only work for legacy engine")
public void includeIdAndNotTypeOrScore() throws Exception {
String query = String.format(Locale.ROOT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
import org.opensearch.search.SearchHits;
import org.opensearch.search.aggregations.Aggregations;
import org.opensearch.sql.data.model.ExprTupleValue;
import org.opensearch.sql.data.model.ExprStringValue;
import org.opensearch.sql.data.model.ExprFloatValue;
import org.opensearch.sql.data.model.ExprLongValue;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.model.ExprValueUtils;
import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory;
Expand Down Expand Up @@ -92,23 +95,34 @@ public Iterator<ExprValue> iterator() {
return (ExprValue) ExprTupleValue.fromExprValueMap(builder.build());
}).iterator();
} else {
float maxScore = hits.getMaxScore();
return Arrays.stream(hits.getHits())
.map(hit -> {
ExprValue docData = exprValueFactory.construct(hit.getSourceAsString());
if (hit.getHighlightFields().isEmpty()) {
return docData;
} else {
ImmutableMap.Builder<String, ExprValue> builder = new ImmutableMap.Builder<>();
builder.putAll(docData.tupleValue());
String source = hit.getSourceAsString();
ExprValue docData = exprValueFactory.construct(source);

ImmutableMap.Builder<String, ExprValue> builder = new ImmutableMap.Builder<>();
builder.putAll(docData.tupleValue());
builder.put("_index", new ExprStringValue(hit.getIndex()));

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we ever have hits from different indices?
If yes, maxScore should be gotten per each hit. If not, you can move getIndex() outside of the loop to optimize performance.

builder.put("_id", new ExprStringValue(hit.getId()));
if (!Float.isNaN(hit.getScore())) {
builder.put("_score", new ExprFloatValue(hit.getScore()));
}
if (!Float.isNaN(maxScore)) {
builder.put("_maxscore", new ExprLongValue(maxScore));
}
Comment on lines +108 to +113

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When _score/_maxscore could be missing?
What will be returned to user in that case? NULL? MISSING (aka empty cell)?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Null is what gets returned. That's what's defined in the _score field.
Which is consistent with what OpenSearch is returning... I'm assuming that's the correct way forward.

builder.put("_sort", new ExprLongValue(hit.getSeqNo()));

if (!hit.getHighlightFields().isEmpty()) {
var hlBuilder = ImmutableMap.<String, ExprValue>builder();
for (var es : hit.getHighlightFields().entrySet()) {
hlBuilder.put(es.getKey(), ExprValueUtils.collectionValue(
Arrays.stream(es.getValue().fragments()).map(
t -> (t.toString())).collect(Collectors.toList())));
}
builder.put("_highlight", ExprTupleValue.fromExprValueMap(hlBuilder.build()));
return ExprTupleValue.fromExprValueMap(builder.build());
}
return (ExprValue) ExprTupleValue.fromExprValueMap(builder.build());
}).iterator();
}
}
Expand Down
9 changes: 9 additions & 0 deletions sql/src/main/antlr/OpenSearchSQLIdentifierParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,18 @@ qualifiedName
ident
: DOT? ID
| BACKTICK_QUOTE_ID
| metadataField
| keywordsCanBeId
;

metadataField
: META_INDEX
| META_ID
| META_SCORE
| META_MAXSCORE
| META_SORT
;

keywordsCanBeId
: FULL
| FIELD | D | T | TS // OD SQL and ODBC special
Expand Down
14 changes: 13 additions & 1 deletion sql/src/main/antlr/OpenSearchSQLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,15 @@ SUBSTRING: 'SUBSTRING';
TRIM: 'TRIM';


// Metadata fields can be ID

META_INDEX: '_INDEX';
META_ID: '_ID';
META_SCORE: '_SCORE';
META_MAXSCORE: '_MAXSCORE';
META_SORT: '_SORT';


// Keywords, but can be ID
// Common Keywords, but can be ID

Expand Down Expand Up @@ -441,14 +450,17 @@ BACKTICK_QUOTE_ID: BQUOTA_STRING;

// Fragments for Literal primitives
fragment EXPONENT_NUM_PART: 'E' [-+]? DEC_DIGIT+;
fragment ID_LITERAL: [@*A-Z]+?[*A-Z_\-0-9]*;
fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"';
fragment SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\'';
fragment BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`';
fragment HEX_DIGIT: [0-9A-F];
fragment DEC_DIGIT: [0-9];
fragment BIT_STRING_L: 'B' '\'' [01]+ '\'';

// Identifiers cannot start with a single '_' since this an OpenSearch reserved
// metadata field. Two underscores (or more) is acceptable, such as '__field'.
fragment ID_LITERAL: ([_][_]|[@*A-Z])+?[*A-Z_\-0-9]*;
Yury-Fridlyand marked this conversation as resolved.
Show resolved Hide resolved

// Last tokens must generate Errors

ERROR_RECOGNITION: . -> channel(ERRORCHANNEL);
Original file line number Diff line number Diff line change
Expand Up @@ -430,12 +430,13 @@ private UnresolvedExpression visitConstantFunction(String functionName,
}

private QualifiedName visitIdentifiers(List<IdentContext> identifiers) {
Boolean isMetadataField = identifiers.stream().filter(id -> id.metadataField() != null).findFirst().isPresent();

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be there a mix of different types of identifiers - some of them are metadata and some of them are not?

return new QualifiedName(
identifiers.stream()
.map(RuleContext::getText)
.map(StringUtils::unquoteIdentifier)
.collect(Collectors.toList())
);
identifiers.stream()
.map(RuleContext::getText)
.map(StringUtils::unquoteIdentifier)
.collect(Collectors.toList()),
isMetadataField);
}

private List<UnresolvedExpression> singleFieldRelevanceArguments(
Expand Down