Skip to content

Commit

Permalink
Add a limit for graph phrase query expansion (#34061)
Browse files Browse the repository at this point in the history
Backport of #34031 for 5.6
  • Loading branch information
jimczi authored Oct 19, 2018
1 parent 06e8b46 commit 42b2268
Show file tree
Hide file tree
Showing 7 changed files with 309 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,15 @@
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.RegExp;
import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings;
import org.apache.lucene.util.QueryBuilder;
import org.elasticsearch.common.Booleans;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.mapper.AllFieldMapper;
Expand All @@ -58,9 +64,11 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Collections;

import static java.util.Collections.unmodifiableMap;
import static org.elasticsearch.common.lucene.search.Queries.fixNegativeQueryIfNeeded;

Expand All @@ -72,6 +80,7 @@
* as well as the query on the name.
*/
public class MapperQueryParser extends AnalyzingQueryParser {
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(MapperQueryParser.class));

public static final Map<String, FieldQueryExtension> FIELD_QUERY_EXTENSIONS;

Expand Down Expand Up @@ -828,6 +837,7 @@ public Query parse(String query) throws ParseException {
* Checks if graph analysis should be enabled for the field depending
* on the provided {@link Analyzer}
*/
@Override
protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field,
String queryText, boolean quoted, int phraseSlop) {
assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
Expand All @@ -849,4 +859,131 @@ protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator
throw new RuntimeException("Error analyzing query text", e);
}
}

/**
* See {@link MapperQueryParser#analyzeGraphPhraseWithLimit}
*/
@Override
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException {
return analyzeGraphPhraseWithLimit(source, field, phraseSlop, this::createSpanQuery, shouldApplyGraphPhraseLimit());
}

/** A BiFuntion that can throw an IOException */
@FunctionalInterface
public interface CheckedBiFunction<T, U, R> {

/**
* Applies this function to the given arguments.
*
* @param t the first function argument
* @param u the second function argument
* @return the function result
*/
R apply(T t, U u) throws IOException;
}

/**
* Checks the value of the JVM option <code>es.query.write.apply_graph_phrase_limit</code> to determine
* if the analysis of graph phrase should be limited to {@link BooleanQuery#getMaxClauseCount()}.
* The JVM option can only be set to <code>true</code> (false is the default value), any other value
* will throw an {@link IllegalArgumentException}.
*/
public static boolean shouldApplyGraphPhraseLimit() {
String value = System.getProperty("es.query.apply_graph_phrase_limit");
if (value == null) {
return false;
} else if ("true".equals(value) == false) {
throw new IllegalArgumentException("[" + value + "] is not a valid value for the JVM option:" +
"[es.query.apply_graph_phrase_limit]. Set it to [true] to activate the limit.");
} else {
return true;
}
}

/**
* Overrides {@link QueryBuilder#analyzeGraphPhrase(TokenStream, String, int)} to add
* a limit (see {@link BooleanQuery#getMaxClauseCount()}) to the number of {@link SpanQuery}
* that this method can create.
*/
public static SpanQuery analyzeGraphPhraseWithLimit(TokenStream source, String field, int phraseSlop,
CheckedBiFunction<TokenStream, String, SpanQuery> spanQueryFunc,
boolean isHardLimit) throws IOException {
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source);
List<SpanQuery> clauses = new ArrayList<>();
int[] articulationPoints = graph.articulationPoints();
int lastState = 0;
int maxBooleanClause = BooleanQuery.getMaxClauseCount();
for (int i = 0; i <= articulationPoints.length; i++) {
int start = lastState;
int end = -1;
if (i < articulationPoints.length) {
end = articulationPoints[i];
}
lastState = end;
final SpanQuery queryPos;
if (graph.hasSidePath(start)) {
List<SpanQuery> queries = new ArrayList<>();
Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
while (it.hasNext()) {
TokenStream ts = it.next();
SpanQuery q = spanQueryFunc.apply(ts, field);
if (q != null) {
if (queries.size() >= maxBooleanClause) {
if (isHardLimit) {
throw new BooleanQuery.TooManyClauses();
} else {

}
}
queries.add(q);
}
}
if (queries.size() > 0) {
queryPos = new SpanOrQuery(queries.toArray(new SpanQuery[0]));
} else {
queryPos = null;
}
} else {
Term[] terms = graph.getTerms(field, start);
assert terms.length > 0;
if (terms.length >= maxBooleanClause) {
if (isHardLimit) {
throw new BooleanQuery.TooManyClauses();
} else {
DEPRECATION_LOGGER.deprecated("Phrase query on field:[" + field + "] reached the max boolean" +
" clause limit [" + maxBooleanClause + "] after expansion. This query will throw an error in" +
" the next major version.");
}
}
if (terms.length == 1) {
queryPos = new SpanTermQuery(terms[0]);
} else {
SpanTermQuery[] orClauses = new SpanTermQuery[terms.length];
for (int idx = 0; idx < terms.length; idx++) {
orClauses[idx] = new SpanTermQuery(terms[idx]);
}
queryPos = new SpanOrQuery(orClauses);
}
}
if (queryPos != null) {
if (clauses.size() >= maxBooleanClause) {
if (isHardLimit) {
throw new BooleanQuery.TooManyClauses();
} else {
DEPRECATION_LOGGER.deprecated("Phrase query on field:[" + field + "] reached the max boolean" +
" clause limit [" + maxBooleanClause + "] after expansion. This query will throw an error in" +
" the next major version.");
}
}
clauses.add(queryPos);
}
}
if (clauses.isEmpty()) {
return null;
} else if (clauses.size() == 1) {
return clauses.get(0);
} else {
return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), phraseSlop, true);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MapperQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.mapper.MappedFieldType;
Expand All @@ -41,6 +43,9 @@
import java.util.List;
import java.util.ArrayList;

import static org.apache.lucene.queryparser.classic.MapperQueryParser.analyzeGraphPhraseWithLimit;
import static org.apache.lucene.queryparser.classic.MapperQueryParser.shouldApplyGraphPhraseLimit;

/**
* Wrapper class for Lucene's SimpleQueryParser that allows us to redefine
* different types of queries.
Expand Down Expand Up @@ -173,6 +178,7 @@ public Query newPrefixQuery(String text) {
* Checks if graph analysis should be enabled for the field depending
* on the provided {@link Analyzer}
*/
@Override
protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field,
String queryText, boolean quoted, int phraseSlop) {
assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
Expand All @@ -195,6 +201,14 @@ protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator
}
}

/**
* See {@link MapperQueryParser#analyzeGraphPhraseWithLimit}
*/
@Override
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException {
return analyzeGraphPhraseWithLimit(source, field, phraseSlop, this::createSpanQuery, shouldApplyGraphPhraseLimit());
}

private static Query wrapWithBoost(Query query, float boost) {
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
return new BoostQuery(query, boost);
Expand Down
12 changes: 12 additions & 0 deletions core/src/main/java/org/elasticsearch/index/search/MatchQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
import org.apache.lucene.queryparser.classic.MapperQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
Expand Down Expand Up @@ -58,6 +59,9 @@

import java.io.IOException;

import static org.apache.lucene.queryparser.classic.MapperQueryParser.analyzeGraphPhraseWithLimit;
import static org.apache.lucene.queryparser.classic.MapperQueryParser.shouldApplyGraphPhraseLimit;

public class MatchQuery {

public enum Type implements Writeable {
Expand Down Expand Up @@ -349,6 +353,14 @@ protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator
}
}

/**
* See {@link MapperQueryParser#analyzeGraphPhraseWithLimit}
*/
@Override
protected SpanQuery analyzeGraphPhrase(TokenStream source, String field, int phraseSlop) throws IOException {
return analyzeGraphPhraseWithLimit(source, field, phraseSlop, this::createSpanQuery, shouldApplyGraphPhraseLimit());
}

public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) {
final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop);
return toMultiPhrasePrefix(query, phraseSlop, maxExpansions);
Expand Down
4 changes: 4 additions & 0 deletions core/src/main/java/org/elasticsearch/search/SearchModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.elasticsearch.search;

import org.apache.lucene.queryparser.classic.MapperQueryParser;
import org.apache.lucene.search.BooleanQuery;
import org.elasticsearch.common.NamedRegistry;
import org.elasticsearch.common.geo.ShapesAvailability;
Expand Down Expand Up @@ -258,6 +259,7 @@

import static java.util.Collections.unmodifiableMap;
import static java.util.Objects.requireNonNull;
import static org.apache.lucene.queryparser.classic.MapperQueryParser.shouldApplyGraphPhraseLimit;

/**
* Sets up things that can be done at search time like queries, aggregations, and suggesters.
Expand All @@ -282,6 +284,8 @@ public class SearchModule {
public SearchModule(Settings settings, boolean transportClient, List<SearchPlugin> plugins) {
this.settings = settings;
this.transportClient = transportClient;
// checks if the system property es.query.apply_graph_phrase_limit is set to a valid value
shouldApplyGraphPhraseLimit();
registerSuggesters(plugins);
highlighters = setupHighlighters(settings, plugins);
registerScoreFunctions(plugins);
Expand Down
Loading

0 comments on commit 42b2268

Please sign in to comment.