Skip to content

Commit

Permalink
Distinguish between simple matches with and without the terms index (#…
Browse files Browse the repository at this point in the history
…63945)

We currently use TextSearchInfo to let query parsers know when a field
will support match queries. Some field types (numeric, constant, range)
can produce simple match queries that don't use the terms index, and
it is useful to distinguish between these fields on the one hand and
keyword/text-type fields on the other. In particular, the
SignificantTextAggregation only works on fields that have indexed terms,
but there is currently no efficient way to see this at search time and so
the factory falls back on checking to see if an index analyzer has been
defined, with the result that some nonsensical field types are permitted.

This commit adds a new static TextSearchInfo implementation called
SIMPLE_MATCH_WITHOUT_TERMS that can be returned by field
types with no corresponding terms index. It changes significant text
to check for this rather than for the presence of an index analyzer.

This is a breaking change, in that the significant text agg will now throw
an error up-front if you try and apply it to a numeric field, whereas before
you would get an empty result.
  • Loading branch information
romseygeek committed Oct 27, 2020
1 parent c28206f commit 261c8af
Show file tree
Hide file tree
Showing 15 changed files with 44 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ public static final class ScaledFloatFieldType extends SimpleMappedFieldType {

public ScaledFloatFieldType(String name, boolean indexed, boolean stored, boolean hasDocValues,
Map<String, String> meta, double scalingFactor, Double nullValue) {
super(name, indexed, stored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
super(name, indexed, stored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta);
this.scalingFactor = scalingFactor;
this.nullValue = nullValue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -541,11 +541,11 @@ public void testReduceFromSeveralShards() throws IOException, ExecutionException
* Ensure requests using nondeterministic scripts do not get cached.
*/
public void testScriptCaching() throws Exception {
assertAcked(prepareCreate("cache_test_idx").addMapping("type", "d", "type=long")
assertAcked(prepareCreate("cache_test_idx").addMapping("type", "s", "type=long", "t", "type=text")
.setSettings(Settings.builder().put("requests.cache.enable", true).put("number_of_shards", 1).put("number_of_replicas", 1))
.get());
indexRandom(true, client().prepareIndex("cache_test_idx", "type", "1").setSource("s", 1),
client().prepareIndex("cache_test_idx", "type", "2").setSource("s", 2));
indexRandom(true, client().prepareIndex("cache_test_idx", "type", "1").setSource("s", 1, "t", "foo"),
client().prepareIndex("cache_test_idx", "type", "2").setSource("s", 2, "t", "bar"));

// Make sure we are starting with a clear cache
assertThat(client().admin().indices().prepareStats("cache_test_idx").setRequestCache(true).get().getTotal().getRequestCache()
Expand All @@ -561,7 +561,7 @@ public void testScriptCaching() throws Exception {
SearchResponse r;
if (useSigText) {
r = client().prepareSearch("cache_test_idx").setSize(0)
.addAggregation(significantText("foo", "s").significanceHeuristic(scriptHeuristic)).get();
.addAggregation(significantText("foo", "t").significanceHeuristic(scriptHeuristic)).get();
} else {
r = client().prepareSearch("cache_test_idx").setSize(0)
.addAggregation(significantTerms("foo").field("s").significanceHeuristic(scriptHeuristic)).get();
Expand All @@ -578,7 +578,7 @@ public void testScriptCaching() throws Exception {
useSigText = randomBoolean();
if (useSigText) {
r = client().prepareSearch("cache_test_idx").setSize(0)
.addAggregation(significantText("foo", "s").significanceHeuristic(scriptHeuristic)).get();
.addAggregation(significantText("foo", "t").significanceHeuristic(scriptHeuristic)).get();
} else {
r = client().prepareSearch("cache_test_idx").setSize(0)
.addAggregation(significantTerms("foo").field("s").significanceHeuristic(scriptHeuristic)).get();
Expand All @@ -592,7 +592,7 @@ public void testScriptCaching() throws Exception {

// Ensure that non-scripted requests are cached as normal
if (useSigText) {
r = client().prepareSearch("cache_test_idx").setSize(0).addAggregation(significantText("foo", "s")).get();
r = client().prepareSearch("cache_test_idx").setSize(0).addAggregation(significantText("foo", "t")).get();
} else {
r = client().prepareSearch("cache_test_idx").setSize(0).addAggregation(significantTerms("foo").field("s")).get();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
public abstract class ConstantFieldType extends MappedFieldType {

public ConstantFieldType(String name, Map<String, String> meta) {
super(name, true, false, true, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
super(name, true, false, true, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ public static final class DateFieldType extends MappedFieldType {
public DateFieldType(String name, boolean isSearchable, boolean isStored, boolean hasDocValues,
DateFormatter dateTimeFormatter, Resolution resolution, String nullValue,
Map<String, String> meta) {
super(name, isSearchable, isStored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
super(name, isSearchable, isStored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta);
this.dateTimeFormatter = dateTimeFormatter;
this.dateMathParser = dateTimeFormatter.toDateMathParser();
this.resolution = resolution;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ public static final class IpFieldType extends SimpleMappedFieldType {

public IpFieldType(String name, boolean indexed, boolean stored, boolean hasDocValues,
InetAddress nullValue, Map<String, String> meta) {
super(name, indexed, stored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
super(name, indexed, stored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta);
this.nullValue = nullValue;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,11 @@ public Map<String, String> meta() {
* Returns information on how any text in this field is indexed
*
* Fields that do not support any text-based queries should return
* {@link TextSearchInfo#NONE}. Some fields (eg numeric) may support
* {@link TextSearchInfo#NONE}. Some fields (eg keyword) may support
* only simple match queries, and can return
* {@link TextSearchInfo#SIMPLE_MATCH_ONLY}
* {@link TextSearchInfo#SIMPLE_MATCH_ONLY}; other fields may support
* simple match queries without using the terms index, and can return
* {@link TextSearchInfo#SIMPLE_MATCH_WITHOUT_TERMS}
*/
public TextSearchInfo getTextSearchInfo() {
return textSearchInfo;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -897,7 +897,7 @@ public static class NumberFieldType extends SimpleMappedFieldType {

public NumberFieldType(String name, NumberType type, boolean isSearchable, boolean isStored,
boolean hasDocValues, boolean coerce, Number nullValue, Map<String, String> meta) {
super(name, isSearchable, isStored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
super(name, isSearchable, isStored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta);
this.type = Objects.requireNonNull(type);
this.coerce = coerce;
this.nullValue = nullValue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ public static final class RangeFieldType extends MappedFieldType {

public RangeFieldType(String name, RangeType type, boolean indexed, boolean stored,
boolean hasDocValues, boolean coerce, Map<String, String> meta) {
super(name, indexed, stored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
super(name, indexed, stored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta);
assert type != RangeType.DATE;
this.rangeType = Objects.requireNonNull(type);
dateTimeFormatter = null;
Expand All @@ -178,7 +178,7 @@ public RangeFieldType(String name, RangeType type) {

public RangeFieldType(String name, boolean indexed, boolean stored, boolean hasDocValues, DateFormatter formatter,
boolean coerce, Map<String, String> meta) {
super(name, indexed, stored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
super(name, indexed, stored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta);
this.rangeType = RangeType.DATE;
this.dateTimeFormatter = Objects.requireNonNull(formatter);
this.dateMathParser = dateTimeFormatter.toDateMathParser();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ static final class SeqNoFieldType extends SimpleMappedFieldType {
private static final SeqNoFieldType INSTANCE = new SeqNoFieldType();

private SeqNoFieldType() {
super(NAME, true, false, true, TextSearchInfo.SIMPLE_MATCH_ONLY, Collections.emptyMap());
super(NAME, true, false, true, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, Collections.emptyMap());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ public class TextSearchInfo {
public static final TextSearchInfo WHITESPACE_MATCH_ONLY
= new TextSearchInfo(SIMPLE_MATCH_ONLY_FIELD_TYPE, null, Lucene.WHITESPACE_ANALYZER, Lucene.WHITESPACE_ANALYZER);

/**
* Defines indexing information for fields that support simple match text queries
* without using the terms index
*/
public static final TextSearchInfo SIMPLE_MATCH_WITHOUT_TERMS
= new TextSearchInfo(SIMPLE_MATCH_ONLY_FIELD_TYPE, null, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER);

private static final NamedAnalyzer FORBIDDEN_ANALYZER = new NamedAnalyzer("", AnalyzerScope.GLOBAL,
new Analyzer() {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ static final class VersionFieldType extends MappedFieldType {
public static final VersionFieldType INSTANCE = new VersionFieldType();

private VersionFieldType() {
super(NAME, false, false, true, TextSearchInfo.SIMPLE_MATCH_ONLY, Collections.emptyMap());
super(NAME, false, false, true, TextSearchInfo.NONE, Collections.emptyMap());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.elasticsearch.common.util.BytesRefHash;
import org.elasticsearch.common.util.ObjectArray;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
Expand Down Expand Up @@ -59,7 +60,6 @@ public class SignificantTextAggregatorFactory extends AggregatorFactory {
private static final int MEMORY_GROWTH_REPORTING_INTERVAL_BYTES = 5000;

private final IncludeExclude includeExclude;
private final String indexedFieldName;
private final MappedFieldType fieldType;
private final String[] sourceFieldNames;
private final QueryBuilder backgroundFilter;
Expand All @@ -82,16 +82,16 @@ public SignificantTextAggregatorFactory(String name,
super(name, context, parent, subFactoriesBuilder, metadata);

this.fieldType = context.getFieldType(fieldName);
if (fieldType == null ) {
if (fieldType == null) {
throw new IllegalArgumentException("Field [" + fieldName + "] does not exist, SignificantText " +
"requires an analyzed field");
}
if (fieldType != null && fieldType.indexAnalyzer() == null) {
}
if (supportsAgg(fieldType) == false) {
throw new IllegalArgumentException("Field [" + fieldType.name() + "] has no analyzer, but SignificantText " +
"requires an analyzed field");
}
this.indexedFieldName = fieldType != null ? fieldType.name() : fieldName;
this.sourceFieldNames = sourceFieldNames == null ? new String[] { indexedFieldName } : sourceFieldNames;
String indexedFieldName = fieldType.name();
this.sourceFieldNames = sourceFieldNames == null ? new String[] {indexedFieldName} : sourceFieldNames;

this.includeExclude = includeExclude;
this.backgroundFilter = backgroundFilter;
Expand All @@ -100,6 +100,11 @@ public SignificantTextAggregatorFactory(String name,
this.significanceHeuristic = significanceHeuristic;
}

private static boolean supportsAgg(MappedFieldType ft) {
return ft.getTextSearchInfo() != TextSearchInfo.NONE
&& ft.getTextSearchInfo() != TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS;
}

@Override
protected Aggregator createInternal(SearchContext searchContext, Aggregator parent, CardinalityUpperBound cardinality,
Map<String, Object> metadata) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
Expand Down Expand Up @@ -79,11 +80,7 @@ protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldTy

@Override
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
// TODO it is likely accidental that SigText supports anything other than Bytes, and then only text fields
return Arrays.asList(CoreValuesSourceType.NUMERIC,
CoreValuesSourceType.BYTES,
CoreValuesSourceType.RANGE,
CoreValuesSourceType.GEOPOINT);
return Collections.singletonList(CoreValuesSourceType.BYTES);
}

@Override
Expand Down Expand Up @@ -145,7 +142,7 @@ public void testSignificance() throws IOException {
}
}


public void testMissingField() throws IOException {
TextFieldType textFieldType = new TextFieldType("text");
textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
Expand All @@ -167,15 +164,15 @@ public void testMissingField() throws IOException {
try (IndexReader reader = DirectoryReader.open(w)) {
IndexSearcher searcher = new IndexSearcher(reader);


IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> searchAndReduce(searcher, new TermQuery(new Term("text", "odd")), aggBuilder, textFieldType));
assertThat(e.getMessage(), equalTo("Field [this_field_does_not_exist] does not exist, SignificantText "
+ "requires an analyzed field"));
}
}
}

public void testFieldAlias() throws IOException {
TextFieldType textFieldType = new TextFieldType("text");
textFieldType.setIndexAnalyzer(new NamedAnalyzer("my_analyzer", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ public UnsignedLongFieldType(
Number nullValueFormatted,
Map<String, String> meta
) {
super(name, indexed, isStored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
super(name, indexed, isStored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta);
this.nullValueFormatted = nullValueFormatted;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ abstract class AbstractScriptFieldType<LeafFactory> extends MappedFieldType {
TriFunction<String, Map<String, Object>, SearchLookup, LeafFactory> factory,
Map<String, String> meta
) {
super(name, false, false, false, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
super(name, false, false, false, TextSearchInfo.SIMPLE_MATCH_WITHOUT_TERMS, meta);
this.script = script;
this.factory = factory;
}
Expand Down

0 comments on commit 261c8af

Please sign in to comment.