Skip to content

Commit

Permalink
Mask wildcard query special characters on keyword queries (elastic#53127
Browse files Browse the repository at this point in the history
)

Wildcard queries on keyword fields get normalized, however this normalization
step should exclude the two special characters * and ? in order to keep the
wildcard query itself intact.

Closes elastic#46300
  • Loading branch information
Christoph Büscher committed Mar 12, 2020
1 parent 3972cbe commit dfe1a10
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ private static String valueToString(Object value) {
public final Query termQuery(Object value, QueryShardContext context) {
String pattern = valueToString(value);
if (matches(pattern, context)) {
if (context != null && context.getMapperService().hasNested()) {
// type filters are expected not to match nested docs
return Queries.newNonNestedFilter(context.indexVersionCreated());
}
return Queries.newMatchAllQuery();
} else {
return new MatchNoDocsQuery();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ protected KeywordFieldType(KeywordFieldType ref) {
this.splitQueriesOnWhitespace = ref.splitQueriesOnWhitespace;
}

@Override
public KeywordFieldType clone() {
return new KeywordFieldType(this);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@

import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
Expand All @@ -31,13 +29,16 @@
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.support.QueryParsers;

import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static org.elasticsearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES;

Expand All @@ -47,6 +48,8 @@
* can be implemented. */
public abstract class StringFieldType extends TermBasedFieldType {

private static final Pattern WILDCARD_PATTERN = Pattern.compile("(\\\\.)|([?*]+)");

public StringFieldType() {}

protected StringFieldType(MappedFieldType ref) {
Expand Down Expand Up @@ -92,16 +95,41 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer

@Override
public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
Query termQuery = termQuery(value, context);
if (termQuery instanceof MatchNoDocsQuery || termQuery instanceof MatchAllDocsQuery) {
return termQuery;
}

failIfNotIndexed();
if (context.allowExpensiveQueries() == false) {
throw new ElasticsearchException("[wildcard] queries cannot be executed when '" +
ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false.");
}
Term term = MappedFieldType.extractTerm(termQuery);

Term term;
if (searchAnalyzer() != null) {
// we want to normalize everything except wildcard characters, e.g. F?o Ba* to f?o ba*, even if e.g there
// is a char_filter that would otherwise remove them
Matcher wildcardMatcher = WILDCARD_PATTERN.matcher(value);
BytesRefBuilder sb = new BytesRefBuilder();
int last = 0;

while (wildcardMatcher.find()) {
if (wildcardMatcher.start() > 0) {
String chunk = value.substring(last, wildcardMatcher.start());

BytesRef normalized = searchAnalyzer().normalize(name(), chunk);
sb.append(normalized);
}
// append the matched group - without normalizing
sb.append(new BytesRef(wildcardMatcher.group()));

last = wildcardMatcher.end();
}
if (last < value.length()) {
String chunk = value.substring(last);
BytesRef normalized = searchAnalyzer().normalize(name(), chunk);
sb.append(normalized);
}
term = new Term(name(), sb.toBytesRef());
} else {
term = new Term(name(), indexedValueForSearch(value));
}

WildcardQuery query = new WildcardQuery(term);
QueryParsers.setRewriteMethod(query, method);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,18 @@
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.geo.ShapeRelation;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.time.DateMathParser;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.plain.ConstantIndexFieldData;
import org.elasticsearch.index.query.QueryShardContext;

import java.io.IOException;
import java.time.ZoneId;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
Expand Down Expand Up @@ -88,7 +91,7 @@ public MetadataFieldMapper getDefault(MappedFieldType fieldType, ParserContext c
}
}

public static final class TypeFieldType extends StringFieldType {
public static final class TypeFieldType extends ConstantFieldType {

TypeFieldType() {
}
Expand All @@ -114,55 +117,26 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
}

@Override
public boolean isSearchable() {
return true;
}

@Override
public Query existsQuery(QueryShardContext context) {
return new MatchAllDocsQuery();
}

@Override
public Query termQuery(Object value, QueryShardContext context) {
return termsQuery(Arrays.asList(value), context);
}

@Override
public Query termsQuery(List<?> values, QueryShardContext context) {
DocumentMapper mapper = context.getMapperService().documentMapper();
if (mapper == null) {
return new MatchNoDocsQuery("No types");
}
BytesRef indexType = indexedValueForSearch(mapper.type());
if (values.stream()
.map(this::indexedValueForSearch)
.anyMatch(indexType::equals)) {
if (context.getMapperService().hasNested()) {
// type filters are expected not to match nested docs
return Queries.newNonNestedFilter(context.indexVersionCreated());
} else {
return new MatchAllDocsQuery();
}
} else {
return new MatchNoDocsQuery("Type list does not contain the index type");
}
protected boolean matches(String pattern, QueryShardContext context) {
String type = context.getMapperService().documentMapper().type();
return pattern.equals(type);
}

@Override
public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) {
public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, ShapeRelation relation,
ZoneId timeZone, DateMathParser parser, QueryShardContext context) {
Query result = new MatchAllDocsQuery();
String type = context.getMapperService().documentMapper().type();
if (type != null) {
BytesRef typeBytes = new BytesRef(type);
if (lowerTerm != null) {
int comp = indexedValueForSearch(lowerTerm).compareTo(typeBytes);
int comp = BytesRefs.toBytesRef(lowerTerm).compareTo(typeBytes);
if (comp > 0 || (comp == 0 && includeLower == false)) {
result = new MatchNoDocsQuery("[_type] was lexicographically smaller than lower bound of range");
}
}
if (upperTerm != null) {
int comp = indexedValueForSearch(upperTerm).compareTo(typeBytes);
int comp = BytesRefs.toBytesRef(upperTerm).compareTo(typeBytes);
if (comp < 0 || (comp == 0 && includeUpper == false)) {
result = new MatchNoDocsQuery("[_type] was lexicographically greater than upper bound of range");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,26 +53,25 @@ public void testTermsQuery() throws Exception {
MapperService mapperService = Mockito.mock(MapperService.class);
Mockito.when(mapperService.documentMapper()).thenReturn(null);
Mockito.when(context.getMapperService()).thenReturn(mapperService);
DocumentMapper mapper = Mockito.mock(DocumentMapper.class);
Mockito.when(mapper.type()).thenReturn("_doc");
Mockito.when(mapperService.documentMapper()).thenReturn(mapper);

TypeFieldMapper.TypeFieldType ft = new TypeFieldMapper.TypeFieldType();
ft.setName(TypeFieldMapper.NAME);
Query query = ft.termQuery("my_type", context);

Query query = ft.termQuery("_doc", context);
assertEquals(new MatchAllDocsQuery(), query);

query = ft.termQuery("other_type", context);
assertEquals(new MatchNoDocsQuery(), query);

DocumentMapper mapper = Mockito.mock(DocumentMapper.class);
Mockito.when(mapper.type()).thenReturn("my_type");
Mockito.when(mapperService.documentMapper()).thenReturn(mapper);
query = ft.termQuery("my_type", context);
Mockito.when(mapper.type()).thenReturn("other_type");
query = ft.termQuery("other_type", context);
assertEquals(new MatchAllDocsQuery(), query);

Mockito.when(mapperService.hasNested()).thenReturn(true);
query = ft.termQuery("my_type", context);
query = ft.termQuery("other_type", context);
assertEquals(Queries.newNonNestedFilter(context.indexVersionCreated()), query);

mapper = Mockito.mock(DocumentMapper.class);
Mockito.when(mapper.type()).thenReturn("other_type");
Mockito.when(mapperService.documentMapper()).thenReturn(mapper);
query = ft.termQuery("my_type", context);
assertEquals(new MatchNoDocsQuery(), query);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -560,13 +560,6 @@ public void testParseRelation() {
assertEquals(ShapeRelation.INTERSECTS, builder.relation());
}

public void testTypeField() throws IOException {
RangeQueryBuilder builder = QueryBuilders.rangeQuery("_type")
.from("value1");
builder.doToQuery(createShardContext());
assertWarnings(QueryShardContext.TYPES_DEPRECATION_MESSAGE);
}

/**
* Range queries should generally be cacheable, at least the ones we create randomly.
* This test makes sure we also test the non-cacheable cases regularly.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import java.io.IOException;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -75,7 +76,9 @@ protected void doAssertLuceneQuery(WildcardQueryBuilder queryBuilder, Query quer

assertThat(wildcardQuery.getField(), equalTo(expectedFieldName));
assertThat(wildcardQuery.getTerm().field(), equalTo(expectedFieldName));
assertThat(wildcardQuery.getTerm().text(), equalTo(queryBuilder.value()));
// wildcard queries get normalized
String text = wildcardQuery.getTerm().text().toLowerCase(Locale.ROOT);
assertThat(text, equalTo(text));
} else {
Query expected = new MatchNoDocsQuery("unknown field [" + expectedFieldName + "]");
assertEquals(expected, query);
Expand Down Expand Up @@ -138,14 +141,14 @@ public void testTypeField() throws IOException {
builder.doToQuery(createShardContext());
assertWarnings(QueryShardContext.TYPES_DEPRECATION_MESSAGE);
}

public void testRewriteIndexQueryToMatchNone() throws IOException {
WildcardQueryBuilder query = new WildcardQueryBuilder("_index", "does_not_exist");
QueryShardContext queryShardContext = createShardContext();
QueryBuilder rewritten = query.rewrite(queryShardContext);
assertThat(rewritten, instanceOf(MatchNoneQueryBuilder.class));
}
}

public void testRewriteIndexQueryNotMatchNone() throws IOException {
String fullIndexName = getIndex().getName();
String firstHalfOfIndexName = fullIndexName.substring(0,fullIndexName.length()/2);
Expand Down
Loading

0 comments on commit dfe1a10

Please sign in to comment.