diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java index fa7055cadd2a9..ace6e6d6f28ec 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java @@ -20,7 +20,6 @@ package org.elasticsearch.index.mapper; import com.fasterxml.jackson.core.JsonParseException; - import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.Field; import org.apache.lucene.document.FloatPoint; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java index 0687c81a64802..3de703099bcb2 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorFactory.java @@ -88,7 +88,12 @@ public SignificantTermsAggregatorFactory(String name, Map metaData) throws IOException { super(name, config, queryShardContext, parent, subFactoriesBuilder, metaData); - if (!config.unmapped()) { + if (config.unmapped() == false) { + if (config.fieldContext().fieldType().isSearchable() == false) { + throw new IllegalArgumentException("SignificantText aggregation requires fields to be searchable, but [" + + config.fieldContext().fieldType().name() + "] is not"); + } + this.fieldType = config.fieldContext().fieldType(); this.indexedFieldName = fieldType.name(); } @@ -129,6 +134,10 @@ private FilterableTermsEnum getTermsEnum(String field) throws IOException { } private long getBackgroundFrequency(String value) throws IOException { + // fieldType can be null if the field is unmapped, but theoretically this method should only be called + // when constructing buckets. Assert to ensure this is the case + // TODO this is a bad setup and it should be refactored + assert fieldType != null; Query query = fieldType.termQuery(value, queryShardContext); if (query instanceof TermQuery) { // for types that use the inverted index, we prefer using a caching terms diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorFactory.java index 4930c213443b8..950bde076908d 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorFactory.java @@ -83,6 +83,10 @@ public SignificantTextAggregatorFactory(String name, // Note that if the field is unmapped (its field type is null), we don't fail, // and just use the given field name as a placeholder. this.fieldType = queryShardContext.fieldMapper(fieldName); + if (fieldType != null && fieldType.indexAnalyzer() == null) { + throw new IllegalArgumentException("Field [" + fieldType.name() + "] has no analyzer, but SignificantText " + + "requires an analyzed field"); + } this.indexedFieldName = fieldType != null ? fieldType.name() : fieldName; this.sourceFieldNames = sourceFieldNames == null ? new String[] { indexedFieldName } @@ -124,6 +128,10 @@ private FilterableTermsEnum getTermsEnum(String field) throws IOException { } private long getBackgroundFrequency(String value) throws IOException { + // fieldType can be null if the field is unmapped, but theoretically this method should only be called + // when constructing buckets. Assert to ensure this is the case + // TODO this is a bad setup and it should be refactored + assert fieldType != null; Query query = fieldType.termQuery(value, queryShardContext); if (query instanceof TermQuery) { // for types that use the inverted index, we prefer using a caching terms diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java index 6cb4826ead2c4..aa35aab052b2c 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTermsAggregatorTests.java @@ -36,6 +36,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.BinaryFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; @@ -46,10 +47,14 @@ import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.bucket.significant.SignificantTermsAggregatorFactory.ExecutionMode; import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude; +import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; +import org.elasticsearch.search.aggregations.support.ValueType; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; import org.junit.Before; import java.io.IOException; @@ -75,6 +80,27 @@ public void setUpTest() throws Exception { fieldType.setName("field"); } + @Override + protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) { + return new SignificantTermsAggregationBuilder("foo", ValueType.STRING).field(fieldName); + } + + @Override + protected List getSupportedValuesSourceTypes() { + return List.of(CoreValuesSourceType.NUMERIC, + CoreValuesSourceType.BYTES); + } + + @Override + protected List unsupportedMappedFieldTypes() { + return List.of( + NumberFieldMapper.NumberType.DOUBLE.typeName(), // floating points are not supported at all + NumberFieldMapper.NumberType.FLOAT.typeName(), + NumberFieldMapper.NumberType.HALF_FLOAT.typeName(), + BinaryFieldMapper.CONTENT_TYPE // binary fields are not supported because they cannot be searched + ); + } + /** * For each provided field type, we also register an alias with name {@code -alias}. */ diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorTests.java index 123bb0f60427c..6a354b0a6a496 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantTextAggregatorTests.java @@ -34,12 +34,17 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.BinaryFieldMapper; +import org.elasticsearch.index.mapper.GeoPointFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; +import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.bucket.sampler.InternalSampler; import org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregationBuilder; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; +import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; import java.io.IOException; import java.util.Arrays; @@ -63,6 +68,28 @@ protected Map getFieldAliases(MappedFieldType... fieldT Function.identity())); } + @Override + protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) { + return new SignificantTextAggregationBuilder("foo", fieldName); + } + + @Override + protected List getSupportedValuesSourceTypes() { + // TODO it is likely accidental that SigText supports anything other than Bytes, and then only text fields + return List.of(CoreValuesSourceType.NUMERIC, + CoreValuesSourceType.BYTES, + CoreValuesSourceType.RANGE, + CoreValuesSourceType.GEOPOINT); + } + + @Override + protected List unsupportedMappedFieldTypes() { + return List.of( + BinaryFieldMapper.CONTENT_TYPE, // binary fields are not supported because they do not have analyzers + GeoPointFieldMapper.CONTENT_TYPE // geopoint fields cannot use term queries + ); + } + /** * Uses the significant text aggregation to find the keywords in text fields */ diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index 287935b035011..6033df8d9fa4e 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -18,12 +18,15 @@ */ package org.elasticsearch.search.aggregations; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.Document; import org.apache.lucene.document.HalfFloatPoint; import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.document.LatLonDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StoredField; import org.apache.lucene.index.AssertingDirectoryReader; import org.apache.lucene.index.CompositeReaderContext; import org.apache.lucene.index.DirectoryReader; @@ -49,6 +52,7 @@ import org.elasticsearch.common.lease.Releasable; import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader; +import org.elasticsearch.common.network.NetworkAddress; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.util.BigArrays; @@ -56,6 +60,10 @@ import org.elasticsearch.common.util.MockPageCacheRecycler; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AnalysisRegistry; +import org.elasticsearch.index.analysis.AnalyzerScope; +import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.cache.bitset.BitsetFilterCache; import org.elasticsearch.index.cache.bitset.BitsetFilterCache.Listener; import org.elasticsearch.index.cache.query.DisabledQueryCache; @@ -108,6 +116,7 @@ import org.junit.After; import java.io.IOException; +import java.net.InetAddress; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -119,7 +128,6 @@ import java.util.function.Function; import java.util.stream.Collectors; -import static java.util.Collections.singleton; import static org.elasticsearch.test.InternalAggregationTestCase.DEFAULT_MAX_BUCKETS; import static org.mockito.Matchers.anyObject; import static org.mockito.Matchers.anyString; @@ -142,7 +150,6 @@ public abstract class AggregatorTestCase extends ESTestCase { ObjectMapper.CONTENT_TYPE, // Cannot aggregate objects GeoShapeFieldMapper.CONTENT_TYPE, // Cannot aggregate geoshapes (yet) - TextFieldMapper.CONTENT_TYPE, // TODO Does not support doc values, but does support FD, needs a lot of mocking ObjectMapper.NESTED_CONTENT_TYPE, // TODO support for nested CompletionFieldMapper.CONTENT_TYPE, // TODO support completion FieldAliasMapper.CONTENT_TYPE // TODO support alias @@ -598,6 +605,18 @@ protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldTy "createAggBuilderForTypeTest() must be implemented as well."); } + /** + * A method that allows implementors to specifically blacklist particular field types (based on their content_name). + * This is needed in some areas where the ValuesSourceType is not granular enough, for example integer values + * vs floating points, or `keyword` bytes vs `binary` bytes (which are not searchable) + * + * This is a blacklist instead of a whitelist because there are vastly more field types than ValuesSourceTypes, + * and it's expected that these unsupported cases are exceptional rather than common + */ + protected List unsupportedMappedFieldTypes() { + return Collections.emptyList(); + } + /** * This test will validate that an aggregator succeeds or fails to run against all the field types * that are registered in {@link IndicesModule} (e.g. all the core field types). An aggregator @@ -612,6 +631,7 @@ public final void testSupportedFieldTypes() throws IOException { Settings settings = Settings.builder().put("index.version.created", Version.CURRENT.id).build(); String fieldName = "typeTestFieldName"; List supportedVSTypes = getSupportedValuesSourceTypes(); + List unsupportedMappedFieldTypes = unsupportedMappedFieldTypes(); if (supportedVSTypes.isEmpty()) { // If the test says it doesn't support any VStypes, it has not been converted yet so skip @@ -630,7 +650,11 @@ public final void testSupportedFieldTypes() throws IOException { Map source = new HashMap<>(); source.put("type", mappedType.getKey()); - source.put("doc_values", "true"); + + // Text is the only field that doesn't support DVs, instead FD + if (mappedType.getKey().equals(TextFieldMapper.CONTENT_TYPE) == false) { + source.put("doc_values", "true"); + } Mapper.Builder builder = mappedType.getValue().parse(fieldName, source, new MockParserContext()); FieldMapper mapper = (FieldMapper) builder.build(new BuilderContext(settings, new ContentPath())); @@ -651,15 +675,16 @@ public final void testSupportedFieldTypes() throws IOException { IndexSearcher indexSearcher = newIndexSearcher(indexReader); AggregationBuilder aggregationBuilder = createAggBuilderForTypeTest(fieldType, fieldName); + ValuesSourceType vst = fieldType.getValuesSourceType(); // TODO in the future we can make this more explicit with expectThrows(), when the exceptions are standardized try { searchAndReduce(indexSearcher, new MatchAllDocsQuery(), aggregationBuilder, fieldType); - if (supportedVSTypes.contains(fieldType.getValuesSourceType()) == false) { + if (supportedVSTypes.contains(vst) == false || unsupportedMappedFieldTypes.contains(fieldType.typeName())) { fail("Aggregator [" + aggregationBuilder.getType() + "] should not support field type [" - + fieldType.typeName() + "] but executing against the field did not throw an excetion"); + + fieldType.typeName() + "] but executing against the field did not throw an exception"); } } catch (Exception e) { - if (supportedVSTypes.contains(fieldType.getValuesSourceType())) { + if (supportedVSTypes.contains(vst) && unsupportedMappedFieldTypes.contains(fieldType.typeName()) == false) { fail("Aggregator [" + aggregationBuilder.getType() + "] supports field type [" + fieldType.typeName() + "] but executing against the field threw an exception: [" + e.getMessage() + "]"); } @@ -679,34 +704,50 @@ private void writeTestDoc(MappedFieldType fieldType, String fieldName, RandomInd String typeName = fieldType.typeName(); ValuesSourceType vst = fieldType.getValuesSourceType(); + Document doc = new Document(); + String json; if (vst.equals(CoreValuesSourceType.NUMERIC)) { // TODO note: once VS refactor adds DATE/BOOLEAN, this conditional will go away + long v; if (typeName.equals(DateFieldMapper.CONTENT_TYPE) || typeName.equals(DateFieldMapper.DATE_NANOS_CONTENT_TYPE)) { - iw.addDocument(singleton(new SortedNumericDocValuesField(fieldName, randomNonNegativeLong()))); + // positive integer because date_nanos gets unhappy with large longs + v = Math.abs(randomInt()); + json = "{ \"" + fieldName + "\" : \"" + v + "\" }"; } else if (typeName.equals(BooleanFieldMapper.CONTENT_TYPE)) { - iw.addDocument(singleton(new SortedNumericDocValuesField(fieldName, randomBoolean() ? 0 : 1))); + v = randomBoolean() ? 0 : 1; + json = "{ \"" + fieldName + "\" : \"" + (v == 0 ? "false" : "true") + "\" }"; } else if (typeName.equals(NumberFieldMapper.NumberType.DOUBLE.typeName())) { - long encoded = NumericUtils.doubleToSortableLong(Math.abs(randomDouble())); - iw.addDocument(singleton(new SortedNumericDocValuesField(fieldName, encoded))); + double d = Math.abs(randomDouble()); + v = NumericUtils.doubleToSortableLong(d); + json = "{ \"" + fieldName + "\" : \"" + d + "\" }"; } else if (typeName.equals(NumberFieldMapper.NumberType.FLOAT.typeName())) { - long encoded = NumericUtils.floatToSortableInt(Math.abs(randomFloat())); - iw.addDocument(singleton(new SortedNumericDocValuesField(fieldName, encoded))); + float f = Math.abs(randomFloat()); + v = NumericUtils.floatToSortableInt(f); + json = "{ \"" + fieldName + "\" : \"" + f + "\" }"; } else if (typeName.equals(NumberFieldMapper.NumberType.HALF_FLOAT.typeName())) { - long encoded = HalfFloatPoint.halfFloatToSortableShort(Math.abs(randomFloat())); - iw.addDocument(singleton(new SortedNumericDocValuesField(fieldName, encoded))); + float f = Math.abs(randomFloat()); + v = HalfFloatPoint.halfFloatToSortableShort(f); + json = "{ \"" + fieldName + "\" : \"" + f + "\" }"; } else { - iw.addDocument(singleton(new SortedNumericDocValuesField(fieldName, randomNonNegativeLong()))); + // smallest numeric is a byte so we select the smallest + v = Math.abs(randomByte()); + json = "{ \"" + fieldName + "\" : \"" + v + "\" }"; } + doc.add(new SortedNumericDocValuesField(fieldName, v)); + } else if (vst.equals(CoreValuesSourceType.BYTES)) { if (typeName.equals(BinaryFieldMapper.CONTENT_TYPE)) { - iw.addDocument(singleton(new BinaryFieldMapper.CustomBinaryDocValuesField(fieldName, new BytesRef("a").bytes))); + doc.add(new BinaryFieldMapper.CustomBinaryDocValuesField(fieldName, new BytesRef("a").bytes)); + json = "{ \"" + fieldName + "\" : \"a\" }"; } else if (typeName.equals(IpFieldMapper.CONTENT_TYPE)) { // TODO note: once VS refactor adds IP, this conditional will go away - boolean v4 = randomBoolean(); - iw.addDocument(singleton(new SortedSetDocValuesField(fieldName, new BytesRef(InetAddressPoint.encode(randomIp(v4)))))); + InetAddress ip = randomIp(randomBoolean()); + json = "{ \"" + fieldName + "\" : \"" + NetworkAddress.format(ip) + "\" }"; + doc.add(new SortedSetDocValuesField(fieldName, new BytesRef(InetAddressPoint.encode(ip)))); } else { - iw.addDocument(singleton(new SortedSetDocValuesField(fieldName, new BytesRef("a")))); + doc.add(new SortedSetDocValuesField(fieldName, new BytesRef("a"))); + json = "{ \"" + fieldName + "\" : \"a\" }"; } } else if (vst.equals(CoreValuesSourceType.RANGE)) { Object start; @@ -743,19 +784,37 @@ private void writeTestDoc(MappedFieldType fieldType, String fieldName, RandomInd } final RangeFieldMapper.Range range = new RangeFieldMapper.Range(rangeType, start, end, true, true); - iw.addDocument(singleton(new BinaryDocValuesField(fieldName, rangeType.encodeRanges(Collections.singleton(range))))); - + doc.add(new BinaryDocValuesField(fieldName, rangeType.encodeRanges(Collections.singleton(range)))); + json = "{ \"" + fieldName + "\" : { \n" + + " \"gte\" : \"" + start + "\",\n" + + " \"lte\" : \"" + end + "\"\n" + + " }}"; } else if (vst.equals(CoreValuesSourceType.GEOPOINT)) { - iw.addDocument(singleton(new LatLonDocValuesField(fieldName, randomDouble(), randomDouble()))); + double lat = randomDouble(); + double lon = randomDouble(); + doc.add(new LatLonDocValuesField(fieldName, lat, lon)); + json = "{ \"" + fieldName + "\" : \"[" + lon + "," + lat + "]\" }"; } else { throw new IllegalStateException("Unknown field type [" + typeName + "]"); } + + doc.add(new StoredField("_source", new BytesRef(json))); + iw.addDocument(doc); } private class MockParserContext extends Mapper.TypeParser.ParserContext { MockParserContext() { super(null, null, null, null, null); } + + @Override + public IndexAnalyzers getIndexAnalyzers() { + NamedAnalyzer defaultAnalyzer = new NamedAnalyzer(AnalysisRegistry.DEFAULT_ANALYZER_NAME, + AnalyzerScope.GLOBAL, new StandardAnalyzer()); + return new IndexAnalyzers(Map.of(AnalysisRegistry.DEFAULT_ANALYZER_NAME, defaultAnalyzer), Map.of(), Map.of()); + } + + } @After