Skip to content

Commit

Permalink
Prevent SigTerms/SigText from running on fields they do not support (e…
Browse files Browse the repository at this point in the history
…lastic#52851)

* Prevent SigTerms/SigText from running on fields they do not support

SigTerms cannot run on fields that are not searchable, and SigText
cannot run on fields that do not have analyzers.  Both of these
situations fail today with an esoteric exception, so this just formalizes
the constraint by throwing an IllegalArgumentException up front.

In practice, the only affected field seems to be the `binary` field,
which is neither searchable or has a default analyzer (e.g. even numeric
and keyword fields have a default analyzer despite not being tokenized)

Adds supported-type tests, and makes some changes to the test itself
to allow testing sigtext (indexing _source).

Also a few tweaks to the test to avoid bad randomization (negative
numbers, etc).

* Tweak exception text
  • Loading branch information
polyfractal authored Mar 23, 2020
1 parent da9273a commit ec4c699
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.elasticsearch.index.mapper;

import com.fasterxml.jackson.core.JsonParseException;

import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatPoint;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,12 @@ public SignificantTermsAggregatorFactory(String name,
Map<String, Object> metaData) throws IOException {
super(name, config, queryShardContext, parent, subFactoriesBuilder, metaData);

if (!config.unmapped()) {
if (config.unmapped() == false) {
if (config.fieldContext().fieldType().isSearchable() == false) {
throw new IllegalArgumentException("SignificantText aggregation requires fields to be searchable, but ["
+ config.fieldContext().fieldType().name() + "] is not");
}

this.fieldType = config.fieldContext().fieldType();
this.indexedFieldName = fieldType.name();
}
Expand Down Expand Up @@ -129,6 +134,10 @@ private FilterableTermsEnum getTermsEnum(String field) throws IOException {
}

private long getBackgroundFrequency(String value) throws IOException {
// fieldType can be null if the field is unmapped, but theoretically this method should only be called
// when constructing buckets. Assert to ensure this is the case
// TODO this is a bad setup and it should be refactored
assert fieldType != null;
Query query = fieldType.termQuery(value, queryShardContext);
if (query instanceof TermQuery) {
// for types that use the inverted index, we prefer using a caching terms
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ public SignificantTextAggregatorFactory(String name,
// Note that if the field is unmapped (its field type is null), we don't fail,
// and just use the given field name as a placeholder.
this.fieldType = queryShardContext.fieldMapper(fieldName);
if (fieldType != null && fieldType.indexAnalyzer() == null) {
throw new IllegalArgumentException("Field [" + fieldType.name() + "] has no analyzer, but SignificantText " +
"requires an analyzed field");
}
this.indexedFieldName = fieldType != null ? fieldType.name() : fieldName;
this.sourceFieldNames = sourceFieldNames == null
? new String[] { indexedFieldName }
Expand Down Expand Up @@ -124,6 +128,10 @@ private FilterableTermsEnum getTermsEnum(String field) throws IOException {
}

private long getBackgroundFrequency(String value) throws IOException {
// fieldType can be null if the field is unmapped, but theoretically this method should only be called
// when constructing buckets. Assert to ensure this is the case
// TODO this is a bad setup and it should be refactored
assert fieldType != null;
Query query = fieldType.termQuery(value, queryShardContext);
if (query instanceof TermQuery) {
// for types that use the inverted index, we prefer using a caching terms
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.BinaryFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
Expand All @@ -46,10 +47,14 @@
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory.ExecutionMode;
import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.ValueType;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
import org.junit.Before;

import java.io.IOException;
Expand All @@ -75,6 +80,27 @@ public void setUpTest() throws Exception {
fieldType.setName("field");
}

@Override
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
return new SignificantTermsAggregationBuilder("foo", ValueType.STRING).field(fieldName);
}

@Override
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
return List.of(CoreValuesSourceType.NUMERIC,
CoreValuesSourceType.BYTES);
}

@Override
protected List<String> unsupportedMappedFieldTypes() {
return List.of(
NumberFieldMapper.NumberType.DOUBLE.typeName(), // floating points are not supported at all
NumberFieldMapper.NumberType.FLOAT.typeName(),
NumberFieldMapper.NumberType.HALF_FLOAT.typeName(),
BinaryFieldMapper.CONTENT_TYPE // binary fields are not supported because they cannot be searched
);
}

/**
* For each provided field type, we also register an alias with name {@code <field>-alias}.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,17 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.BinaryFieldMapper;
import org.elasticsearch.index.mapper.GeoPointFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorTestCase;
import org.elasticsearch.search.aggregations.bucket.sampler.InternalSampler;
import org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregationBuilder;
import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;

import java.io.IOException;
import java.util.Arrays;
Expand All @@ -63,6 +68,28 @@ protected Map<String, MappedFieldType> getFieldAliases(MappedFieldType... fieldT
Function.identity()));
}

@Override
protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
return new SignificantTextAggregationBuilder("foo", fieldName);
}

@Override
protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
// TODO it is likely accidental that SigText supports anything other than Bytes, and then only text fields
return List.of(CoreValuesSourceType.NUMERIC,
CoreValuesSourceType.BYTES,
CoreValuesSourceType.RANGE,
CoreValuesSourceType.GEOPOINT);
}

@Override
protected List<String> unsupportedMappedFieldTypes() {
return List.of(
BinaryFieldMapper.CONTENT_TYPE, // binary fields are not supported because they do not have analyzers
GeoPointFieldMapper.CONTENT_TYPE // geopoint fields cannot use term queries
);
}

/**
* Uses the significant text aggregation to find the keywords in text fields
*/
Expand Down
Loading

0 comments on commit ec4c699

Please sign in to comment.