Skip to content

Commit

Permalink
Review KEEP logic to prevent duplicate column names (elastic#103316)
Browse files Browse the repository at this point in the history
  • Loading branch information
luigidellaquila authored Dec 13, 2023
1 parent 36eb1b7 commit be9882f
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 22 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/103316.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 103316
summary: Review KEEP logic to prevent duplicate column names
area: ES|QL
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -416,6 +417,40 @@ private LogicalPlan resolveEval(Eval eval, List<Attribute> childOutput) {
return changed ? new Eval(eval.source(), eval.child(), newFields) : eval;
}

/**
* resolve each item manually.
*
* Fields are added in the order they appear.
*
* If one field matches multiple expressions, the following precedence rules apply (higher to lower):
* 1. complete field name (ie. no wildcards)
* 2. partial wildcard expressions (eg. fieldNam*)
* 3. wildcard only (ie. *)
*
* If a field name matches multiple expressions with the same precedence, last one is used.
*
* A few examples below:
*
* // full name
* row foo = 1, bar = 2 | keep foo, bar, foo -> bar, foo
*
* // the full name has precedence on wildcard expression
* row foo = 1, bar = 2 | keep foo, bar, foo* -> foo, bar
*
* // the two wildcard expressions have the same priority, even though the first one is more specific
* // so last one wins
* row foo = 1, bar = 2 | keep foo*, bar, fo* -> bar, foo
*
* // * has the lowest priority
* row foo = 1, bar = 2 | keep *, foo -> bar, foo
* row foo = 1, bar = 2 | keep foo, * -> foo, bar
* row foo = 1, bar = 2 | keep bar*, foo, * -> bar, foo
*
*
* @param p
* @param childOutput
* @return
*/
private LogicalPlan resolveKeep(Project p, List<Attribute> childOutput) {
List<NamedExpression> resolvedProjections = new ArrayList<>();
var projections = p.projections();
Expand All @@ -427,26 +462,31 @@ private LogicalPlan resolveKeep(Project p, List<Attribute> childOutput) {
}
// otherwise resolve them
else {
var starPosition = -1; // no star
// resolve each item manually while paying attention to:
// 1. name patterns a*, *b, a*b
// 2. star * - which can only appear once and signifies "everything else" - this will be added at the end
for (var ne : projections) {
if (ne instanceof UnresolvedStar) {
starPosition = resolvedProjections.size();
} else if (ne instanceof UnresolvedAttribute ua) {
resolvedProjections.addAll(resolveAgainstList(ua, childOutput));
} else {
// if this gets here it means it was already resolved
resolvedProjections.add(ne);
Map<NamedExpression, Integer> priorities = new LinkedHashMap<>();
for (Attribute attribute : childOutput) {
for (var proj : projections) {
List<Attribute> resolved;
int priority;
if (proj instanceof UnresolvedStar) {
resolved = childOutput;
priority = 2;
} else if (proj instanceof UnresolvedAttribute ua) {
resolved = resolveAgainstList(ua, childOutput);
priority = Regex.isSimpleMatchPattern(ua.name()) ? 1 : 0;
} else {
resolved = List.of(attribute);
priority = 0;
}
for (Attribute attr : resolved) {
Integer previousPrio = priorities.get(attr);
if (previousPrio == null || previousPrio >= priority) {
priorities.remove(attr);
priorities.put(attr, priority);
}
}
}
}
// compute star if specified and add it to the list
if (starPosition >= 0) {
var remainingProjections = new ArrayList<>(childOutput);
remainingProjections.removeAll(resolvedProjections);
resolvedProjections.addAll(starPosition, remainingProjections);
}
resolvedProjections = new ArrayList<>(priorities.keySet());
}

return new EsqlProject(p.source(), p.child(), resolvedProjections);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.elasticsearch.xpack.esql.enrich.EnrichPolicyResolution;
import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry;
import org.elasticsearch.xpack.esql.expression.function.aggregate.Max;
import org.elasticsearch.xpack.esql.parser.ParsingException;
import org.elasticsearch.xpack.esql.plan.logical.EsqlUnresolvedRelation;
import org.elasticsearch.xpack.esql.plan.logical.Eval;
import org.elasticsearch.xpack.esql.plan.logical.Row;
Expand Down Expand Up @@ -266,11 +267,68 @@ public void testNoProjection() {
);
}

public void testProjectOrder() {
public void testDuplicateProjections() {
assertProjection("""
from test
| keep first_name, first_name
""", "first_name");
assertProjection("""
from test
| keep first_name, first_name, last_name, first_name
""", "last_name", "first_name");
}

public void testProjectWildcard() {
assertProjection("""
from test
| keep first_name, *, last_name
""", "first_name", "_meta_field", "emp_no", "gender", "job", "job.raw", "languages", "long_noidx", "salary", "last_name");
assertProjection("""
from test
| keep first_name, last_name, *
""", "first_name", "last_name", "_meta_field", "emp_no", "gender", "job", "job.raw", "languages", "long_noidx", "salary");
assertProjection("""
from test
| keep *, first_name, last_name
""", "_meta_field", "emp_no", "gender", "job", "job.raw", "languages", "long_noidx", "salary", "first_name", "last_name");

var e = expectThrows(ParsingException.class, () -> analyze("""
from test
| keep *, first_name, last_name, *
"""));
assertThat(e.getMessage(), containsString("Cannot specify [*] more than once"));

}

public void testProjectMixedWildcard() {
assertProjection("""
from test
| keep *name, first*
""", "last_name", "first_name");
assertProjection("""
from test
| keep first_name, *name, first*
""", "first_name", "last_name");
assertProjection("""
from test
| keep *ob*, first_name, *name, first*
""", "job", "job.raw", "first_name", "last_name");
assertProjection("""
from test
| keep first_name, *, *name
""", "first_name", "_meta_field", "emp_no", "gender", "job", "job.raw", "languages", "long_noidx", "salary", "last_name");
assertProjection("""
from test
| keep first*, *, last_name, first_name
""", "_meta_field", "emp_no", "gender", "job", "job.raw", "languages", "long_noidx", "salary", "last_name", "first_name");
assertProjection("""
from test
| keep first*, *, last_name, fir*
""", "_meta_field", "emp_no", "gender", "job", "job.raw", "languages", "long_noidx", "salary", "last_name", "first_name");
assertProjection("""
from test
| keep *, job*
""", "_meta_field", "emp_no", "first_name", "gender", "languages", "last_name", "long_noidx", "salary", "job", "job.raw");
}

public void testProjectThenDropName() {
Expand Down Expand Up @@ -1429,9 +1487,6 @@ public void testMissingAttributeException_InChainedEval() {
public void testUnresolvedMvExpand() {
var e = expectThrows(VerificationException.class, () -> analyze("row foo = 1 | mv_expand bar"));
assertThat(e.getMessage(), containsString("Unknown column [bar]"));

e = expectThrows(VerificationException.class, () -> analyze("row foo = 1 | keep foo, foo | mv_expand foo"));
assertThat(e.getMessage(), containsString("Reference [foo] is ambiguous (to disambiguate use quotes or qualifiers)"));
}

private void verifyUnsupported(String query, String errorMessage) {
Expand Down

0 comments on commit be9882f

Please sign in to comment.