From 48352849646a5453df89b2b5f82520b97248eec2 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Tue, 31 Oct 2023 14:57:52 -0700 Subject: [PATCH 01/24] Implementation for match_only_text field Signed-off-by: Rishabh Maurya --- .../index/mapper/MappedFieldType.java | 13 +- .../mapper/MatchOnlyTextFieldMapper.java | 167 ++++++++++++++++++ .../index/mapper/TextFieldMapper.java | 10 +- .../index/query/SourceFieldMatchQuery.java | 142 +++++++++++++++ .../opensearch/index/search/MatchQuery.java | 14 +- .../index/search/MultiMatchQuery.java | 4 +- .../org/opensearch/indices/IndicesModule.java | 2 + 7 files changed, 341 insertions(+), 11 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java create mode 100644 server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java diff --git a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java index da62ddfd7017d..1069d131f3499 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java @@ -354,23 +354,34 @@ public Query existsQuery(QueryShardContext context) { } public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + return phraseQuery(stream, slop, enablePositionIncrements, null); + } + + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { throw new IllegalArgumentException( "Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + return multiPhraseQuery(stream, slop, enablePositionIncrements, null); + } + + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { throw new IllegalArgumentException( "Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + return phrasePrefixQuery(stream, slop, maxExpansions, null); + } + + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { throw new IllegalArgumentException( "Can only use phrase prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } - public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { throw new IllegalArgumentException( "Can only use span prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java new file mode 100644 index 0000000000000..b379cafaad5a0 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -0,0 +1,167 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.opensearch.Version; +import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.SourceFieldMatchQuery; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class MatchOnlyTextFieldMapper extends TextFieldMapper { + + public static final FieldType FIELD_TYPE = new FieldType(); + public static final String CONTENT_TYPE = "match_only_text"; + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + static { + FIELD_TYPE.setTokenized(true); + FIELD_TYPE.setStored(false); + FIELD_TYPE.setStoreTermVectors(false); + FIELD_TYPE.setOmitNorms(true); + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); + FIELD_TYPE.freeze(); + } + + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); + + protected MatchOnlyTextFieldMapper(String simpleName, FieldType fieldType, MatchOnlyTextFieldType mappedFieldType, + TextFieldMapper.PrefixFieldMapper prefixFieldMapper, + TextFieldMapper.PhraseFieldMapper phraseFieldMapper, + MultiFields multiFields, CopyTo copyTo, Builder builder) { + + super(simpleName, fieldType, mappedFieldType, prefixFieldMapper, phraseFieldMapper, multiFields, copyTo, builder); + } + + public static class Builder extends TextFieldMapper.Builder { + + public Builder(String name, IndexAnalyzers indexAnalyzers) { + super(name, indexAnalyzers); + } + + public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) { + super(name, indexCreatedVersion, indexAnalyzers); + } + + @Override + public MatchOnlyTextFieldMapper build(BuilderContext context) { + FieldType fieldType = FIELD_TYPE; + MatchOnlyTextFieldType tft = new MatchOnlyTextFieldType(buildFieldType(fieldType, context)); + return new MatchOnlyTextFieldMapper( + name, + fieldType, + tft, + buildPrefixMapper(context, fieldType, tft), + buildPhraseMapper(fieldType, tft), + multiFieldsBuilder.build(this, context), + copyTo.build(), + this + ); + } + } + + public static final class MatchOnlyTextFieldType extends TextFieldMapper.TextFieldType { + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + public MatchOnlyTextFieldType(TextFieldMapper.TextFieldType tft) { + super(tft.name(), tft.isSearchable(), tft.isStored(), tft.getTextSearchInfo(), tft.meta()); + } + + @Override + public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, QueryShardContext context) throws IOException { + PhraseQuery phraseQuery = (PhraseQuery) super.phraseQuery(stream, slop, enablePosIncrements); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (Term term: phraseQuery.getTerms()) { + builder.add(new TermQuery(term), BooleanClause.Occur.FILTER); + } + return new SourceFieldMatchQuery(builder.build(), phraseQuery, this, + (SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup()); + } + + @Override + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { + MultiPhraseQuery multiPhraseQuery = (MultiPhraseQuery) super.multiPhraseQuery(stream, slop, enablePositionIncrements); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (Term[] terms : multiPhraseQuery.getTermArrays()) { + BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); + for (Term term: terms) { + disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + } + builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); + } + return new SourceFieldMatchQuery(builder.build(), multiPhraseQuery, this, + (SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup()); + } + + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { + Query phrasePrefixQuery = super.phrasePrefixQuery(stream, slop, maxExpansions); + List> termArray = getTermsFromTokenStream(stream); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (int i = 0; i < termArray.size(); i++) { + BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); + for (Term term: termArray.get(i)) { + if (i == termArray.size() - 1) { + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery(name()); + mqb.add(term); + disjunctions.add(mqb, BooleanClause.Occur.SHOULD); + } else { + disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + } + } + builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); + } + return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this, + (SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup()); + } + + private List> getTermsFromTokenStream(TokenStream stream) throws IOException { + final List> termArray = new ArrayList<>(); + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + List currentTerms = new ArrayList<>(); + stream.reset(); + while (stream.incrementToken()) { + if (posIncrAtt.getPositionIncrement() != 0) { + if (currentTerms.isEmpty() == false) { + termArray.add(List.copyOf(currentTerms)); + } + currentTerms.clear(); + } + currentTerms.add(new Term(name(), termAtt.getBytesRef())); + } + termArray.add(List.copyOf(currentTerms)); + return termArray; + } + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java index 1d0d1ae2bd899..5780e42105fe5 100644 --- a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java @@ -395,7 +395,7 @@ protected List> getParameters() { ); } - private TextFieldType buildFieldType(FieldType fieldType, BuilderContext context) { + protected TextFieldType buildFieldType(FieldType fieldType, BuilderContext context) { NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); @@ -420,7 +420,7 @@ private TextFieldType buildFieldType(FieldType fieldType, BuilderContext context return ft; } - private PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fieldType, TextFieldType tft) { + protected PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fieldType, TextFieldType tft) { if (indexPrefixes.get() == null) { return null; } @@ -454,7 +454,7 @@ private PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fi return new PrefixFieldMapper(pft, prefixFieldType); } - private PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType parent) { + protected PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType parent) { if (indexPhrases.get() == false) { return null; } @@ -683,7 +683,7 @@ public Query existsQuery(QueryShardContext context) { * * @opensearch.internal */ - private static final class PhraseFieldMapper extends FieldMapper { + protected static final class PhraseFieldMapper extends FieldMapper { PhraseFieldMapper(FieldType fieldType, PhraseFieldType mappedFieldType) { super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty()); @@ -710,7 +710,7 @@ protected String contentType() { * * @opensearch.internal */ - private static final class PrefixFieldMapper extends FieldMapper { + protected static final class PrefixFieldMapper extends FieldMapper { protected PrefixFieldMapper(FieldType fieldType, PrefixFieldType mappedFieldType) { super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty()); diff --git a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java new file mode 100644 index 0000000000000..7eb95e4f1b855 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java @@ -0,0 +1,142 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.SourceValueFetcher; +import org.opensearch.search.lookup.LeafSearchLookup; +import org.opensearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +/** + * A query that matches against each document from the parent query by filtering using the source field values. + * Useful to query against field type which doesn't store positional data and field is not stored/computed dynamically. + */ +public class SourceFieldMatchQuery extends Query { + final private Query delegateQuery; + final private Query filter; + final private SearchLookup lookup; + final private MappedFieldType fieldType; + final private SourceValueFetcher valueFetcher; + + /** + * Constructs a SourceFieldMatchQuery. + * + * @param delegateQuery The parent query to use to find matches. + * @param filter The query used to filter further by running against field value computed using _source field. + * @param fieldType The mapped field type. + * @param valueFetcher The source value fetcher. + * @param lookup The search lookup. + */ + public SourceFieldMatchQuery(Query delegateQuery, Query filter, MappedFieldType fieldType, + SourceValueFetcher valueFetcher, SearchLookup lookup) { + this.delegateQuery = delegateQuery; + this.filter = filter; + this.fieldType = fieldType; + this.valueFetcher = valueFetcher; + this.lookup = lookup; + } + + @Override + public void visit(QueryVisitor visitor) { + delegateQuery.visit(visitor); + } + + @Override + public Query rewrite(IndexSearcher searcher) throws IOException { + return delegateQuery.rewrite(searcher); + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + + Weight weight = delegateQuery.createWeight(searcher, scoreMode, boost); + + return new ConstantScoreWeight(this, boost) { + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + + Scorer scorer = weight.scorer(context); + DocIdSetIterator approximation = scorer.iterator(); + LeafSearchLookup leafSearchLookup = lookup.getLeafSearchLookup(context); + TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) { + + @Override + public boolean matches() { + leafSearchLookup.setDocument(approximation.docID()); + List values = valueFetcher.fetchValues(leafSearchLookup.source()); + MemoryIndex memoryIndex = new MemoryIndex(); + for (Object value : values) { + memoryIndex.addField(fieldType.name(), (String) value, fieldType.indexAnalyzer()); + } + float score = memoryIndex.search(delegateQuery); + return score > 0.0f; + } + + @Override + public float matchCost() { + // arbitrary cost + return 1000f; + } + }; + return new ConstantScoreScorer(this, score(), scoreMode, twoPhase); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + // It is fine to cache if delegate query weight is cacheable since additional logic here + // is just a filter on top of delegate query matches + return weight.isCacheable(ctx); + } + }; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (sameClassAs(o) == false) { + return false; + } + SourceFieldMatchQuery other = (SourceFieldMatchQuery) o; + return Objects.equals(this.delegateQuery, other.delegateQuery) + && this.filter == other.filter + && Objects.equals(this.lookup, other.lookup) + && Objects.equals(this.fieldType, other.fieldType) + && Objects.equals(this.valueFetcher, other.valueFetcher); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), delegateQuery, filter, lookup, fieldType, valueFetcher); + } + + @Override + public String toString(String f) { + return "SourceFieldMatchQuery (delegate query: [ " + delegateQuery.toString(f) + + " ], filter query: [ " + filter.toString(f) + "])"; + } +} diff --git a/server/src/main/java/org/opensearch/index/search/MatchQuery.java b/server/src/main/java/org/opensearch/index/search/MatchQuery.java index 9e2b79971369d..69588c944ce06 100644 --- a/server/src/main/java/org/opensearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/opensearch/index/search/MatchQuery.java @@ -67,6 +67,7 @@ import org.opensearch.core.common.io.stream.Writeable; import org.opensearch.index.mapper.KeywordFieldMapper; import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.MatchOnlyTextFieldMapper; import org.opensearch.index.mapper.TextFieldMapper; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.query.support.QueryParsers; @@ -701,7 +702,7 @@ private Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClaus protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { try { checkForPositions(field); - return fieldType.phraseQuery(stream, slop, enablePositionIncrements); + return fieldType.phraseQuery(stream, slop, enablePositionIncrements, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -713,8 +714,12 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws @Override protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { try { + if (fieldType instanceof MatchOnlyTextFieldMapper.MatchOnlyTextFieldType) { + return ((MatchOnlyTextFieldMapper.MatchOnlyTextFieldType) fieldType) + .multiPhraseQuery(stream, slop, enablePositionIncrements, context); + } checkForPositions(field); - return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -728,7 +733,7 @@ private Query analyzePhrasePrefix(String field, TokenStream stream, int slop, in if (positionCount > 1) { checkForPositions(field); } - return fieldType.phrasePrefixQuery(stream, slop, maxExpansions); + return fieldType.phrasePrefixQuery(stream, slop, maxExpansions, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -887,6 +892,9 @@ private Query analyzeGraphPhrase(TokenStream source, String field, Type type, in private void checkForPositions(String field) { if (fieldType.getTextSearchInfo().hasPositions() == false) { + if (fieldType instanceof MatchOnlyTextFieldMapper.MatchOnlyTextFieldType) { + return; + } throw new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery"); } } diff --git a/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java b/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java index 241f05af2c512..8c0c87e8c9d0c 100644 --- a/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java +++ b/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java @@ -248,7 +248,7 @@ protected Query newPrefixQuery(Term term) { protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { List disjunctions = new ArrayList<>(); for (FieldAndBoost fieldType : blendedFields) { - Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements); + Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements, context); if (fieldType.boost != 1f) { query = new BoostQuery(query, fieldType.boost); } @@ -261,7 +261,7 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { List disjunctions = new ArrayList<>(); for (FieldAndBoost fieldType : blendedFields) { - Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context); if (fieldType.boost != 1f) { query = new BoostQuery(query, fieldType.boost); } diff --git a/server/src/main/java/org/opensearch/indices/IndicesModule.java b/server/src/main/java/org/opensearch/indices/IndicesModule.java index 5c2137ec742a4..eea5dbbf57f6c 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesModule.java +++ b/server/src/main/java/org/opensearch/indices/IndicesModule.java @@ -59,6 +59,7 @@ import org.opensearch.index.mapper.IpFieldMapper; import org.opensearch.index.mapper.KeywordFieldMapper; import org.opensearch.index.mapper.Mapper; +import org.opensearch.index.mapper.MatchOnlyTextFieldMapper; import org.opensearch.index.mapper.MetadataFieldMapper; import org.opensearch.index.mapper.NestedPathFieldMapper; import org.opensearch.index.mapper.NumberFieldMapper; @@ -158,6 +159,7 @@ public static Map getMappers(List mappe mappers.put(nanoseconds.type(), DateFieldMapper.NANOS_PARSER); mappers.put(IpFieldMapper.CONTENT_TYPE, IpFieldMapper.PARSER); mappers.put(TextFieldMapper.CONTENT_TYPE, TextFieldMapper.PARSER); + mappers.put(MatchOnlyTextFieldMapper.CONTENT_TYPE, MatchOnlyTextFieldMapper.PARSER); mappers.put(KeywordFieldMapper.CONTENT_TYPE, KeywordFieldMapper.PARSER); mappers.put(ObjectMapper.CONTENT_TYPE, new ObjectMapper.TypeParser()); mappers.put(ObjectMapper.NESTED_CONTENT_TYPE, new ObjectMapper.TypeParser()); From 40235d927eac0e6a2d673c3730ad2479ea168575 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Tue, 31 Oct 2023 18:39:40 -0700 Subject: [PATCH 02/24] Fix build failures Signed-off-by: Rishabh Maurya --- .../index/mapper/MappedFieldType.java | 22 ++++--- .../mapper/MatchOnlyTextFieldMapper.java | 64 ++++++++++++++----- .../index/query/SourceFieldMatchQuery.java | 12 ++-- .../opensearch/index/search/MatchQuery.java | 4 -- .../aggregations/AggregatorTestCase.java | 3 +- 5 files changed, 71 insertions(+), 34 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java index 1069d131f3499..66d4654e543a2 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java @@ -354,34 +354,36 @@ public Query existsQuery(QueryShardContext context) { } public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { - return phraseQuery(stream, slop, enablePositionIncrements, null); - } - - public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { throw new IllegalArgumentException( "Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { - return multiPhraseQuery(stream, slop, enablePositionIncrements, null); + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { + return phraseQuery(stream, slop, enablePositionIncrements); } - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { throw new IllegalArgumentException( "Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { - return phrasePrefixQuery(stream, slop, maxExpansions, null); + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) + throws IOException { + return multiPhraseQuery(stream, slop, enablePositionIncrements); } - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { throw new IllegalArgumentException( "Can only use phrase prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } + + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { + return phrasePrefixQuery(stream, slop, maxExpansions); + } + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { throw new IllegalArgumentException( "Can only use span prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java index b379cafaad5a0..ad6eab020ef82 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -30,6 +30,10 @@ import java.util.ArrayList; import java.util.List; +/** + * A specialized type of TextFieldMapper which disables the positions and norms to save on storage and executes phrase queries, which requires + * positional data, in a slightly less efficient manner using the {@link org.opensearch.index.query.SourceFieldMatchQuery}. + */ public class MatchOnlyTextFieldMapper extends TextFieldMapper { public static final FieldType FIELD_TYPE = new FieldType(); @@ -51,14 +55,23 @@ protected String contentType() { public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); - protected MatchOnlyTextFieldMapper(String simpleName, FieldType fieldType, MatchOnlyTextFieldType mappedFieldType, - TextFieldMapper.PrefixFieldMapper prefixFieldMapper, - TextFieldMapper.PhraseFieldMapper phraseFieldMapper, - MultiFields multiFields, CopyTo copyTo, Builder builder) { + protected MatchOnlyTextFieldMapper( + String simpleName, + FieldType fieldType, + MatchOnlyTextFieldType mappedFieldType, + TextFieldMapper.PrefixFieldMapper prefixFieldMapper, + TextFieldMapper.PhraseFieldMapper phraseFieldMapper, + MultiFields multiFields, + CopyTo copyTo, + Builder builder + ) { super(simpleName, fieldType, mappedFieldType, prefixFieldMapper, phraseFieldMapper, multiFields, copyTo, builder); } + /** + * Builder class for constructing the MatchOnlyTextFieldMapper. + */ public static class Builder extends TextFieldMapper.Builder { public Builder(String name, IndexAnalyzers indexAnalyzers) { @@ -86,6 +99,11 @@ public MatchOnlyTextFieldMapper build(BuilderContext context) { } } + /** + * The specific field type for MatchOnlyTextFieldMapper + * + * @opensearch.internal + */ public static final class MatchOnlyTextFieldType extends TextFieldMapper.TextFieldType { @Override @@ -101,36 +119,47 @@ public MatchOnlyTextFieldType(TextFieldMapper.TextFieldType tft) { public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, QueryShardContext context) throws IOException { PhraseQuery phraseQuery = (PhraseQuery) super.phraseQuery(stream, slop, enablePosIncrements); BooleanQuery.Builder builder = new BooleanQuery.Builder(); - for (Term term: phraseQuery.getTerms()) { + for (Term term : phraseQuery.getTerms()) { builder.add(new TermQuery(term), BooleanClause.Occur.FILTER); } - return new SourceFieldMatchQuery(builder.build(), phraseQuery, this, - (SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup()); + return new SourceFieldMatchQuery( + builder.build(), + phraseQuery, + this, + (SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), + context.lookup() + ); } @Override - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) + throws IOException { MultiPhraseQuery multiPhraseQuery = (MultiPhraseQuery) super.multiPhraseQuery(stream, slop, enablePositionIncrements); BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (Term[] terms : multiPhraseQuery.getTermArrays()) { BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); - for (Term term: terms) { + for (Term term : terms) { disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); } builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); } - return new SourceFieldMatchQuery(builder.build(), multiPhraseQuery, this, - (SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup()); + return new SourceFieldMatchQuery( + builder.build(), + multiPhraseQuery, + this, + (SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), + context.lookup() + ); } @Override public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { - Query phrasePrefixQuery = super.phrasePrefixQuery(stream, slop, maxExpansions); + Query phrasePrefixQuery = super.phrasePrefixQuery(stream, slop, maxExpansions); List> termArray = getTermsFromTokenStream(stream); BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (int i = 0; i < termArray.size(); i++) { BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); - for (Term term: termArray.get(i)) { + for (Term term : termArray.get(i)) { if (i == termArray.size() - 1) { MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery(name()); mqb.add(term); @@ -141,8 +170,13 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, } builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); } - return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this, - (SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup()); + return new SourceFieldMatchQuery( + builder.build(), + phrasePrefixQuery, + this, + (SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), + context.lookup() + ); } private List> getTermsFromTokenStream(TokenStream stream) throws IOException { diff --git a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java index 7eb95e4f1b855..297cf5fd0e7ee 100644 --- a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java +++ b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java @@ -49,8 +49,13 @@ public class SourceFieldMatchQuery extends Query { * @param valueFetcher The source value fetcher. * @param lookup The search lookup. */ - public SourceFieldMatchQuery(Query delegateQuery, Query filter, MappedFieldType fieldType, - SourceValueFetcher valueFetcher, SearchLookup lookup) { + public SourceFieldMatchQuery( + Query delegateQuery, + Query filter, + MappedFieldType fieldType, + SourceValueFetcher valueFetcher, + SearchLookup lookup + ) { this.delegateQuery = delegateQuery; this.filter = filter; this.fieldType = fieldType; @@ -136,7 +141,6 @@ public int hashCode() { @Override public String toString(String f) { - return "SourceFieldMatchQuery (delegate query: [ " + delegateQuery.toString(f) + - " ], filter query: [ " + filter.toString(f) + "])"; + return "SourceFieldMatchQuery (delegate query: [ " + delegateQuery.toString(f) + " ], filter query: [ " + filter.toString(f) + "])"; } } diff --git a/server/src/main/java/org/opensearch/index/search/MatchQuery.java b/server/src/main/java/org/opensearch/index/search/MatchQuery.java index 69588c944ce06..ec6755ea25703 100644 --- a/server/src/main/java/org/opensearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/opensearch/index/search/MatchQuery.java @@ -714,10 +714,6 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws @Override protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { try { - if (fieldType instanceof MatchOnlyTextFieldMapper.MatchOnlyTextFieldType) { - return ((MatchOnlyTextFieldMapper.MatchOnlyTextFieldType) fieldType) - .multiPhraseQuery(stream, slop, enablePositionIncrements, context); - } checkForPositions(field); return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context); } catch (IllegalArgumentException | IllegalStateException e) { diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java index 82f15a590bea6..0fa6937760a88 100644 --- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java @@ -103,6 +103,7 @@ import org.opensearch.index.mapper.Mapper; import org.opensearch.index.mapper.Mapper.BuilderContext; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.MatchOnlyTextFieldMapper; import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.ObjectMapper; import org.opensearch.index.mapper.ObjectMapper.Nested; @@ -760,7 +761,7 @@ public void testSupportedFieldTypes() throws IOException { source.put("type", mappedType.getKey()); // Text is the only field that doesn't support DVs, instead FD - if (mappedType.getKey().equals(TextFieldMapper.CONTENT_TYPE) == false) { + if (mappedType.getKey().equals(TextFieldMapper.CONTENT_TYPE) == false && mappedType.getKey().equals(MatchOnlyTextFieldMapper.CONTENT_TYPE) == false) { source.put("doc_values", "true"); } From 3a91c05413e183b194a42ca803b1418480698d63 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Fri, 10 Nov 2023 16:30:25 -0600 Subject: [PATCH 03/24] Fix bugs Signed-off-by: Rishabh Maurya --- .../mapper/MatchOnlyTextFieldMapper.java | 99 ++++++++++++++++++- .../index/mapper/TextFieldMapper.java | 19 ++-- .../index/query/SourceFieldMatchQuery.java | 10 +- 3 files changed, 112 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java index ad6eab020ef82..199ecad8028b9 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -22,13 +22,17 @@ import org.apache.lucene.search.TermQuery; import org.opensearch.Version; import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.analysis.NamedAnalyzer; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.query.SourceFieldMatchQuery; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.Map; /** * A specialized type of TextFieldMapper which disables the positions and norms to save on storage and executes phrase queries, which requires @@ -38,6 +42,7 @@ public class MatchOnlyTextFieldMapper extends TextFieldMapper { public static final FieldType FIELD_TYPE = new FieldType(); public static final String CONTENT_TYPE = "match_only_text"; + private final String indexOptions = FieldMapper.indexOptionToString(FIELD_TYPE.indexOptions()); @Override protected String contentType() { @@ -69,10 +74,17 @@ protected MatchOnlyTextFieldMapper( super(simpleName, fieldType, mappedFieldType, prefixFieldMapper, phraseFieldMapper, multiFields, copyTo, builder); } + @Override + public ParametrizedFieldMapper.Builder getMergeBuilder() { + return new Builder(simpleName(), this.indexCreatedVersion, this.indexAnalyzers).init(this); + } + /** * Builder class for constructing the MatchOnlyTextFieldMapper. */ public static class Builder extends TextFieldMapper.Builder { + final Parameter indexOptions = TextParams.indexOptions(m -> ((MatchOnlyTextFieldMapper) m).indexOptions); + final Parameter norms = TextParams.norms(true, m -> ((MatchOnlyTextFieldMapper) m).fieldType.omitNorms() == false); public Builder(String name, IndexAnalyzers indexAnalyzers) { super(name, indexAnalyzers); @@ -84,8 +96,9 @@ public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAna @Override public MatchOnlyTextFieldMapper build(BuilderContext context) { + // TODO - disable norms and index-options and validate FieldType fieldType = FIELD_TYPE; - MatchOnlyTextFieldType tft = new MatchOnlyTextFieldType(buildFieldType(fieldType, context)); + MatchOnlyTextFieldType tft = buildFieldType(fieldType, context); return new MatchOnlyTextFieldMapper( name, fieldType, @@ -97,6 +110,60 @@ public MatchOnlyTextFieldMapper build(BuilderContext context) { this ); } + + @Override + protected MatchOnlyTextFieldType buildFieldType(FieldType fieldType, BuilderContext context) { + NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); + NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); + NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); + + if (fieldType.indexOptions().compareTo(IndexOptions.DOCS) != 0) { + throw new IllegalArgumentException("Cannot set position_increment_gap on field [" + name + "] without positions enabled"); + } + if (positionIncrementGap.get() != POSITION_INCREMENT_GAP_USE_ANALYZER) { + indexAnalyzer = new NamedAnalyzer(indexAnalyzer, positionIncrementGap.get()); + searchAnalyzer = new NamedAnalyzer(searchAnalyzer, positionIncrementGap.get()); + searchQuoteAnalyzer = new NamedAnalyzer(searchQuoteAnalyzer, positionIncrementGap.get()); + } + TextSearchInfo tsi = new TextSearchInfo(fieldType, similarity.getValue(), searchAnalyzer, searchQuoteAnalyzer); + MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType( + buildFullName(context), + index.getValue(), + fieldType.stored(), + tsi, + meta.getValue() + ); + ft.setIndexAnalyzer(indexAnalyzer); + ft.setEagerGlobalOrdinals(eagerGlobalOrdinals.getValue()); + ft.setBoost(boost.getValue()); + if (fieldData.getValue()) { + ft.setFielddata(true, freqFilter.getValue()); + } + return ft; + } + + @Override + protected List> getParameters() { + return Arrays.asList( + index, + store, + indexOptions, + norms, + termVectors, + analyzers.indexAnalyzer, + analyzers.searchAnalyzer, + analyzers.searchQuoteAnalyzer, + similarity, + positionIncrementGap, + fieldData, + freqFilter, + eagerGlobalOrdinals, + indexPhrases, + indexPrefixes, + boost, + meta + ); + } } /** @@ -111,8 +178,8 @@ public String typeName() { return CONTENT_TYPE; } - public MatchOnlyTextFieldType(TextFieldMapper.TextFieldType tft) { - super(tft.name(), tft.isSearchable(), tft.isStored(), tft.getTextSearchInfo(), tft.meta()); + public MatchOnlyTextFieldType(String name, boolean indexed, boolean stored, TextSearchInfo tsi, Map meta) { + super(name, indexed, stored, tsi, meta); } @Override @@ -198,4 +265,30 @@ private List> getTermsFromTokenStream(TokenStream stream) throws IOEx return termArray; } } + + @Override + protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { + // this is a pain, but we have to do this to maintain BWC + builder.field("type", contentType()); + Builder mapperBuilder = (MatchOnlyTextFieldMapper.Builder) getMergeBuilder(); + mapperBuilder.boost.toXContent(builder, includeDefaults); + mapperBuilder.index.toXContent(builder, includeDefaults); + mapperBuilder.store.toXContent(builder, includeDefaults); + this.multiFields.toXContent(builder, params); + this.copyTo.toXContent(builder, params); + mapperBuilder.meta.toXContent(builder, includeDefaults); + mapperBuilder.indexOptions.toXContent(builder, includeDefaults); + mapperBuilder.termVectors.toXContent(builder, includeDefaults); + mapperBuilder.norms.toXContent(builder, includeDefaults); + mapperBuilder.analyzers.indexAnalyzer.toXContent(builder, includeDefaults); + mapperBuilder.analyzers.searchAnalyzer.toXContent(builder, includeDefaults); + mapperBuilder.analyzers.searchQuoteAnalyzer.toXContent(builder, includeDefaults); + mapperBuilder.similarity.toXContent(builder, includeDefaults); + mapperBuilder.eagerGlobalOrdinals.toXContent(builder, includeDefaults); + mapperBuilder.positionIncrementGap.toXContent(builder, includeDefaults); + mapperBuilder.fieldData.toXContent(builder, includeDefaults); + mapperBuilder.freqFilter.toXContent(builder, includeDefaults); + mapperBuilder.indexPrefixes.toXContent(builder, includeDefaults); + mapperBuilder.indexPhrases.toXContent(builder, includeDefaults); + } } diff --git a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java index 5780e42105fe5..ffef9044bd715 100644 --- a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java @@ -110,7 +110,7 @@ public class TextFieldMapper extends ParametrizedFieldMapper { public static final String CONTENT_TYPE = "text"; - private static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1; + protected static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1; private static final String FAST_PHRASE_SUFFIX = "._index_phrase"; /** @@ -214,7 +214,7 @@ private static PrefixConfig parsePrefixConfig(String propName, ParserContext par * * @opensearch.internal */ - private static final class FielddataFrequencyFilter implements ToXContent { + protected static final class FielddataFrequencyFilter implements ToXContent { final double minFreq; final double maxFreq; final int minSegmentSize; @@ -280,15 +280,14 @@ public static class Builder extends ParametrizedFieldMapper.Builder { private final Version indexCreatedVersion; - private final Parameter index = Parameter.indexParam(m -> toType(m).mappedFieldType.isSearchable(), true); - private final Parameter store = Parameter.storeParam(m -> toType(m).fieldType.stored(), false); + protected final Parameter index = Parameter.indexParam(m -> toType(m).mappedFieldType.isSearchable(), true); + protected final Parameter store = Parameter.storeParam(m -> toType(m).fieldType.stored(), false); final Parameter similarity = TextParams.similarity(m -> toType(m).similarity); final Parameter indexOptions = TextParams.indexOptions(m -> toType(m).indexOptions); final Parameter norms = TextParams.norms(true, m -> toType(m).fieldType.omitNorms() == false); final Parameter termVectors = TextParams.termVectors(m -> toType(m).termVectors); - final Parameter positionIncrementGap = Parameter.intParam( "position_increment_gap", false, @@ -332,8 +331,8 @@ public static class Builder extends ParametrizedFieldMapper.Builder { .orElse(null) ).acceptsNull(); - private final Parameter boost = Parameter.boostParam(); - private final Parameter> meta = Parameter.metaParam(); + protected final Parameter boost = Parameter.boostParam(); + protected final Parameter> meta = Parameter.metaParam(); final TextParams.Analyzers analyzers; @@ -968,15 +967,15 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S } - private final FieldType fieldType; + protected final FieldType fieldType; private final PrefixFieldMapper prefixFieldMapper; private final PhraseFieldMapper phraseFieldMapper; private final SimilarityProvider similarity; private final String indexOptions; private final String termVectors; private final int positionIncrementGap; - private final Version indexCreatedVersion; - private final IndexAnalyzers indexAnalyzers; + protected final Version indexCreatedVersion; + protected final IndexAnalyzers indexAnalyzers; private final FielddataFrequencyFilter freqFilter; protected TextFieldMapper( diff --git a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java index 297cf5fd0e7ee..841cbcf23f922 100644 --- a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java +++ b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java @@ -69,8 +69,12 @@ public void visit(QueryVisitor visitor) { } @Override - public Query rewrite(IndexSearcher searcher) throws IOException { - return delegateQuery.rewrite(searcher); + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + Query rewritten = indexSearcher.rewrite(delegateQuery); + if (rewritten == delegateQuery) { + return this; + } + return new SourceFieldMatchQuery(rewritten, filter, fieldType, valueFetcher, lookup); } @Override @@ -96,7 +100,7 @@ public boolean matches() { for (Object value : values) { memoryIndex.addField(fieldType.name(), (String) value, fieldType.indexAnalyzer()); } - float score = memoryIndex.search(delegateQuery); + float score = memoryIndex.search(filter); return score > 0.0f; } From 40b14f2385da06183dd2ab2b2030b116b3de3afe Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Sun, 12 Nov 2023 19:31:14 -0800 Subject: [PATCH 04/24] Added mapper tests, stil failing on prefix and phrase tests Signed-off-by: Rishabh Maurya --- .../mapper/MatchOnlyTextFieldMapper.java | 30 ++- .../mapper/MatchOnlyTextFieldMapperTests.java | 215 ++++++++++++++++++ .../index/mapper/TextFieldMapperTests.java | 177 ++++++++------ 3 files changed, 345 insertions(+), 77 deletions(-) create mode 100644 server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java index 199ecad8028b9..e44a9d0654ee2 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -33,6 +33,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.function.Function; /** * A specialized type of TextFieldMapper which disables the positions and norms to save on storage and executes phrase queries, which requires @@ -43,6 +44,7 @@ public class MatchOnlyTextFieldMapper extends TextFieldMapper { public static final FieldType FIELD_TYPE = new FieldType(); public static final String CONTENT_TYPE = "match_only_text"; private final String indexOptions = FieldMapper.indexOptionToString(FIELD_TYPE.indexOptions()); + private final boolean norms = FIELD_TYPE.omitNorms() == false; @Override protected String contentType() { @@ -83,8 +85,23 @@ public ParametrizedFieldMapper.Builder getMergeBuilder() { * Builder class for constructing the MatchOnlyTextFieldMapper. */ public static class Builder extends TextFieldMapper.Builder { - final Parameter indexOptions = TextParams.indexOptions(m -> ((MatchOnlyTextFieldMapper) m).indexOptions); - final Parameter norms = TextParams.norms(true, m -> ((MatchOnlyTextFieldMapper) m).fieldType.omitNorms() == false); + final Parameter indexOptions = indexOptions(m -> ((MatchOnlyTextFieldMapper) m).indexOptions); + + private static Parameter indexOptions(Function initializer) { + return Parameter.restrictedStringParam("index_options", false, initializer, "docs"); + } + + final Parameter norms = norms(m -> ((MatchOnlyTextFieldMapper) m).norms); + + private static Parameter norms(Function initializer) { + return Parameter.boolParam("norms", false, initializer, false) + .setMergeValidator((o, n) -> o == n || (o && n == false)) + .setValidator(v -> { + if (v == true) { + throw new MapperParsingException("Norms cannot be enabled on for match_only_text field"); + } + }); + } public Builder(String name, IndexAnalyzers indexAnalyzers) { super(name, indexAnalyzers); @@ -97,7 +114,7 @@ public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAna @Override public MatchOnlyTextFieldMapper build(BuilderContext context) { // TODO - disable norms and index-options and validate - FieldType fieldType = FIELD_TYPE; + FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors); MatchOnlyTextFieldType tft = buildFieldType(fieldType, context); return new MatchOnlyTextFieldMapper( name, @@ -117,11 +134,14 @@ protected MatchOnlyTextFieldType buildFieldType(FieldType fieldType, BuilderCont NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); - if (fieldType.indexOptions().compareTo(IndexOptions.DOCS) != 0) { + if (fieldType.indexOptions().compareTo(IndexOptions.DOCS) > 0) { throw new IllegalArgumentException("Cannot set position_increment_gap on field [" + name + "] without positions enabled"); } if (positionIncrementGap.get() != POSITION_INCREMENT_GAP_USE_ANALYZER) { - indexAnalyzer = new NamedAnalyzer(indexAnalyzer, positionIncrementGap.get()); + // for index analyzer we don't set positionIncrementGap whereas for search analyzer its set because + // phrase queries, which make use of it, should work fine as they will directly work on the field value + // per matched document by reading from _source field. + searchAnalyzer = new NamedAnalyzer(searchAnalyzer, positionIncrementGap.get()); searchQuoteAnalyzer = new NamedAnalyzer(searchQuoteAnalyzer, positionIncrementGap.get()); } diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java new file mode 100644 index 0000000000000..9d62710c2320c --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -0,0 +1,215 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.BytesRef; +import org.opensearch.core.common.Strings; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.XContentBuilder; +import org.junit.BeforeClass; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +public class MatchOnlyTextFieldMapperTests extends TextFieldMapperTests { + + @BeforeClass + public static void beforeClass() { + textFieldName = "match_only_text"; + } + + @Override + public void testDefaults() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + assertEquals(fieldMapping(this::minimalMapping).toString(), mapper.mappingSource().toString()); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertEquals("1234", fields[0].stringValue()); + IndexableFieldType fieldType = fields[0].fieldType(); + assertThat(fieldType.omitNorms(), equalTo(true)); + assertTrue(fieldType.tokenized()); + assertFalse(fieldType.stored()); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); + assertThat(fieldType.storeTermVectors(), equalTo(false)); + assertThat(fieldType.storeTermVectorOffsets(), equalTo(false)); + assertThat(fieldType.storeTermVectorPositions(), equalTo(false)); + assertThat(fieldType.storeTermVectorPayloads(), equalTo(false)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + } + + @Override + public void testEnableStore() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("store", true))); + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertTrue(fields[0].fieldType().stored()); + } + + @Override + public void testIndexOptions() throws IOException { + Map supportedOptions = new HashMap<>(); + supportedOptions.put("docs", IndexOptions.DOCS); + + Map unSupportedOptions = new HashMap<>(); + unSupportedOptions.put("freqs", IndexOptions.DOCS_AND_FREQS); + unSupportedOptions.put("positions", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + unSupportedOptions.put("offsets", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + + for (String option : supportedOptions.keySet()) { + XContentBuilder mapping = MediaTypeRegistry.JSON.contentBuilder().startObject().startObject("_doc").startObject("properties"); + mapping.startObject(option).field("type", textFieldName).field("index_options", option).endObject(); + mapping.endObject().endObject().endObject(); + + DocumentMapper mapper = createDocumentMapper(mapping); + String serialized = Strings.toString(MediaTypeRegistry.JSON, mapper); + assertThat(serialized, containsString("\"docs\":{\"type\":\"match_only_text\"}")); + + ParsedDocument doc = mapper.parse(source(b -> { b.field(option, "1234"); })); + + IndexOptions options = supportedOptions.get(option); + IndexableField[] fields = doc.rootDoc().getFields(option); + assertEquals(1, fields.length); + assertEquals(options, fields[0].fieldType().indexOptions()); + } + + for (String option : unSupportedOptions.keySet()) { + XContentBuilder mapping = MediaTypeRegistry.JSON.contentBuilder().startObject().startObject("_doc").startObject("properties"); + mapping.startObject(option).field("type", textFieldName).field("index_options", option).endObject(); + mapping.endObject().endObject().endObject(); + MapperParsingException e = expectThrows(MapperParsingException.class, () -> createDocumentMapper(mapping)); + assertThat( + e.getMessage(), + containsString( + "Failed to parse mapping [_doc]: Unknown value [" + option + "] for field [index_options] - accepted values are [docs]" + ) + ); + } + } + + @Override + public void testAnalyzedFieldPositionIncrementWithoutPositions() { + for (String indexOptions : List.of("docs")) { + try { + createDocumentMapper( + fieldMapping( + b -> b.field("type", textFieldName).field("index_options", indexOptions).field("position_increment_gap", 10) + ) + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + @Override + public void testBWCSerialization() throws IOException { + + } + + public void testPositionIncrementGap() throws IOException { + final int positionIncrementGap = randomIntBetween(1, 1000); + MapperService mapperService = createMapperService( + fieldMapping(b -> b.field("type", textFieldName).field("position_increment_gap", positionIncrementGap)) + ); + ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b" }))); + + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + assertEquals("a", fields[0].stringValue()); + assertEquals("b", fields[1].stringValue()); + + withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> { + TermsEnum terms = getOnlyLeafReader(reader).terms("field").iterator(); + assertTrue(terms.seekExact(new BytesRef("b"))); + PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS); + assertEquals(0, postings.nextDoc()); + assertEquals(positionIncrementGap + 1, postings.nextPosition()); + }); + } + + @Override + public void testSimpleMerge() throws IOException { + XContentBuilder startingMappingBad = fieldMapping( + b -> b.field("type", textFieldName).startObject("index_prefixes").endObject().field("index_phrases", true) + ); + + MapperParsingException exc = expectThrows(MapperParsingException.class, () -> createMapperService(startingMappingBad)); + assertThat( + exc.getMessage(), + containsString("Failed to parse mapping [_doc]: Cannot set index_phrases on field [field] if positions are not enabled") + ); + + XContentBuilder startingMapping = fieldMapping( + b -> b.field("type", textFieldName).startObject("index_prefixes").endObject().field("index_phrases", false) + ); + MapperService mapperService = createMapperService(startingMapping); + + XContentBuilder differentPrefix = fieldMapping( + b -> b.field("type", textFieldName) + .startObject("index_prefixes") + .field("min_chars", "3") + .endObject() + .field("index_phrases", false) + ); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, differentPrefix)); + assertThat(e.getMessage(), containsString("Cannot update parameter [index_prefixes]")); + + XContentBuilder newField = mapping(b -> { + b.startObject("field") + .field("type", textFieldName) + .startObject("index_prefixes") + .endObject() + .field("index_phrases", false) + .endObject(); + b.startObject("other_field").field("type", "keyword").endObject(); + }); + merge(mapperService, newField); + assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(TextFieldMapper.class)); + assertThat(mapperService.documentMapper().mappers().getMapper("other_field"), instanceOf(KeywordFieldMapper.class)); + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java index a9b902e121bda..a4fbd70c69840 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java @@ -95,6 +95,8 @@ public class TextFieldMapperTests extends MapperTestCase { + public static String textFieldName = "text"; + @Override protected void writeFieldValue(XContentBuilder builder) throws IOException { builder.value(1234); @@ -169,30 +171,37 @@ protected void registerParameters(ParameterChecker checker) throws IOException { checker.registerConflictCheck("index", b -> b.field("index", false)); checker.registerConflictCheck("store", b -> b.field("store", true)); - checker.registerConflictCheck("index_phrases", b -> b.field("index_phrases", true)); + if (!textFieldName.equals("match_only_text")) { + checker.registerConflictCheck("index_phrases", b -> b.field("index_phrases", true)); + } checker.registerConflictCheck("index_prefixes", b -> b.startObject("index_prefixes").endObject()); - checker.registerConflictCheck("index_options", b -> b.field("index_options", "docs")); + + if (!textFieldName.equals("match_only_text")) { + checker.registerConflictCheck("index_options", b -> b.field("index_options", "docs")); + } checker.registerConflictCheck("similarity", b -> b.field("similarity", "boolean")); checker.registerConflictCheck("analyzer", b -> b.field("analyzer", "keyword")); checker.registerConflictCheck("term_vector", b -> b.field("term_vector", "yes")); checker.registerConflictCheck("position_increment_gap", b -> b.field("position_increment_gap", 10)); - // norms can be set from true to false, but not vice versa - checker.registerConflictCheck("norms", fieldMapping(b -> { - b.field("type", "text"); - b.field("norms", false); - }), fieldMapping(b -> { - b.field("type", "text"); - b.field("norms", true); - })); - checker.registerUpdateCheck(b -> { - b.field("type", "text"); - b.field("norms", true); - }, b -> { - b.field("type", "text"); - b.field("norms", false); - }, m -> assertFalse(m.fieldType().getTextSearchInfo().hasNorms())); + if (!textFieldName.equals(MatchOnlyTextFieldMapper.CONTENT_TYPE)) { + // norms can be set from true to false, but not vice versa + checker.registerConflictCheck("norms", fieldMapping(b -> { + b.field("type", textFieldName); + b.field("norms", false); + }), fieldMapping(b -> { + b.field("type", textFieldName); + b.field("norms", true); + })); + checker.registerUpdateCheck(b -> { + b.field("type", textFieldName); + b.field("norms", true); + }, b -> { + b.field("type", textFieldName); + b.field("norms", false); + }, m -> assertFalse(m.fieldType().getTextSearchInfo().hasNorms())); + } checker.registerUpdateCheck(b -> b.field("boost", 2.0), m -> assertEquals(m.fieldType().boost(), 2.0, 0)); @@ -237,7 +246,7 @@ public TokenStream create(TokenStream tokenStream) { @Override protected void minimalMapping(XContentBuilder b) throws IOException { - b.field("type", "text"); + b.field("type", textFieldName); } public void testDefaults() throws IOException { @@ -262,7 +271,7 @@ public void testDefaults() throws IOException { public void testBWCSerialization() throws IOException { MapperService mapperService = createMapperService(fieldMapping(b -> { - b.field("type", "text"); + b.field("type", textFieldName); b.field("fielddata", true); b.startObject("fields"); { @@ -312,7 +321,7 @@ public void testBWCSerialization() throws IOException { } public void testEnableStore() throws IOException { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("store", true))); + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("store", true))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(1, fields.length); @@ -320,14 +329,14 @@ public void testEnableStore() throws IOException { } public void testDisableIndex() throws IOException { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("index", false))); + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("index", false))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(0, fields.length); } public void testDisableNorms() throws IOException { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("norms", false))); + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("norms", false))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(1, fields.length); @@ -343,7 +352,7 @@ public void testIndexOptions() throws IOException { XContentBuilder mapping = MediaTypeRegistry.JSON.contentBuilder().startObject().startObject("_doc").startObject("properties"); for (String option : supportedOptions.keySet()) { - mapping.startObject(option).field("type", "text").field("index_options", option).endObject(); + mapping.startObject(option).field("type", textFieldName).field("index_options", option).endObject(); } mapping.endObject().endObject().endObject(); @@ -389,7 +398,7 @@ public void testDefaultPositionIncrementGap() throws IOException { public void testPositionIncrementGap() throws IOException { final int positionIncrementGap = randomIntBetween(1, 1000); MapperService mapperService = createMapperService( - fieldMapping(b -> b.field("type", "text").field("position_increment_gap", positionIncrementGap)) + fieldMapping(b -> b.field("type", textFieldName).field("position_increment_gap", positionIncrementGap)) ); ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b" }))); @@ -409,16 +418,16 @@ public void testPositionIncrementGap() throws IOException { public void testSearchAnalyzerSerialization() throws IOException { XContentBuilder mapping = fieldMapping( - b -> b.field("type", "text").field("analyzer", "standard").field("search_analyzer", "keyword") + b -> b.field("type", textFieldName).field("analyzer", "standard").field("search_analyzer", "keyword") ); assertEquals(mapping.toString(), createDocumentMapper(mapping).mappingSource().toString()); // special case: default index analyzer - mapping = fieldMapping(b -> b.field("type", "text").field("analyzer", "default").field("search_analyzer", "keyword")); + mapping = fieldMapping(b -> b.field("type", textFieldName).field("analyzer", "default").field("search_analyzer", "keyword")); assertEquals(mapping.toString(), createDocumentMapper(mapping).mappingSource().toString()); // special case: default search analyzer - mapping = fieldMapping(b -> b.field("type", "text").field("analyzer", "keyword").field("search_analyzer", "default")); + mapping = fieldMapping(b -> b.field("type", textFieldName).field("analyzer", "keyword").field("search_analyzer", "default")); assertEquals(mapping.toString(), createDocumentMapper(mapping).mappingSource().toString()); XContentBuilder builder = MediaTypeRegistry.JSON.contentBuilder(); @@ -436,7 +445,7 @@ public void testSearchAnalyzerSerialization() throws IOException { public void testSearchQuoteAnalyzerSerialization() throws IOException { XContentBuilder mapping = fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .field("search_analyzer", "standard") .field("search_quote_analyzer", "keyword") @@ -445,7 +454,7 @@ public void testSearchQuoteAnalyzerSerialization() throws IOException { // special case: default index/search analyzer mapping = fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "default") .field("search_analyzer", "default") .field("search_quote_analyzer", "keyword") @@ -456,27 +465,27 @@ public void testSearchQuoteAnalyzerSerialization() throws IOException { public void testTermVectors() throws IOException { XContentBuilder mapping = mapping( b -> b.startObject("field1") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "no") .endObject() .startObject("field2") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "yes") .endObject() .startObject("field3") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "with_offsets") .endObject() .startObject("field4") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "with_positions") .endObject() .startObject("field5") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "with_positions_offsets") .endObject() .startObject("field6") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "with_positions_offsets_payloads") .endObject() ); @@ -526,7 +535,9 @@ public void testTermVectors() throws IOException { } public void testEagerGlobalOrdinals() throws IOException { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("eager_global_ordinals", true))); + DocumentMapper mapper = createDocumentMapper( + fieldMapping(b -> b.field("type", textFieldName).field("eager_global_ordinals", true)) + ); FieldMapper fieldMapper = (FieldMapper) mapper.mappers().getMapper("field"); assertTrue(fieldMapper.fieldType().eagerGlobalOrdinals()); @@ -539,13 +550,13 @@ public void testFielddata() throws IOException { })); assertThat(e.getMessage(), containsString("Text fields are not optimised for operations that require per-document field data")); - MapperService enabledMapper = createMapperService(fieldMapping(b -> b.field("type", "text").field("fielddata", true))); + MapperService enabledMapper = createMapperService(fieldMapping(b -> b.field("type", textFieldName).field("fielddata", true))); enabledMapper.fieldType("field").fielddataBuilder("test", () -> { throw new UnsupportedOperationException(); }); // no exception // this time e = expectThrows( MapperParsingException.class, - () -> createMapperService(fieldMapping(b -> b.field("type", "text").field("index", false).field("fielddata", true))) + () -> createMapperService(fieldMapping(b -> b.field("type", textFieldName).field("index", false).field("fielddata", true))) ); assertThat(e.getMessage(), containsString("Cannot enable fielddata on a [text] field that is not indexed")); } @@ -553,7 +564,7 @@ public void testFielddata() throws IOException { public void testFrequencyFilter() throws IOException { MapperService mapperService = createMapperService( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("fielddata", true) .startObject("fielddata_frequency_filter") .field("min", 2d) @@ -571,15 +582,20 @@ public void testFrequencyFilter() throws IOException { public void testNullConfigValuesFail() throws MapperParsingException { Exception e = expectThrows( MapperParsingException.class, - () -> createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("analyzer", (String) null))) + () -> createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("analyzer", (String) null))) + ); + assertThat( + e.getMessage(), + containsString("[analyzer] on mapper [field] of type [" + textFieldName + "] must not have a [null] value") ); - assertThat(e.getMessage(), containsString("[analyzer] on mapper [field] of type [text] must not have a [null] value")); } public void testNotIndexedFieldPositionIncrement() { Exception e = expectThrows( MapperParsingException.class, - () -> createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("index", false).field("position_increment_gap", 10))) + () -> createDocumentMapper( + fieldMapping(b -> b.field("type", textFieldName).field("index", false).field("position_increment_gap", 10)) + ) ); assertThat(e.getMessage(), containsString("Cannot set position_increment_gap on field [field] without positions enabled")); } @@ -589,7 +605,9 @@ public void testAnalyzedFieldPositionIncrementWithoutPositions() { Exception e = expectThrows( MapperParsingException.class, () -> createDocumentMapper( - fieldMapping(b -> b.field("type", "text").field("index_options", indexOptions).field("position_increment_gap", 10)) + fieldMapping( + b -> b.field("type", textFieldName).field("index_options", indexOptions).field("position_increment_gap", 10) + ) ) ); assertThat(e.getMessage(), containsString("Cannot set position_increment_gap on field [field] without positions enabled")); @@ -600,7 +618,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -615,7 +633,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -632,7 +650,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -649,7 +667,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -666,7 +684,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -689,7 +707,7 @@ public void testNestedIndexPrefixes() throws IOException { .field("type", "object") .startObject("properties") .startObject("field") - .field("type", "text") + .field("type", textFieldName) .startObject("index_prefixes") .endObject() .endObject() @@ -703,7 +721,7 @@ public void testNestedIndexPrefixes() throws IOException { MappedFieldType prefix = ((TextFieldType) textField).getPrefixFieldType(); assertEquals(prefix.name(), "object.field._index_prefix"); FieldMapper mapper = (FieldMapper) mapperService.documentMapper().mappers().getMapper("object.field._index_prefix"); - assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, mapper.fieldType.indexOptions()); + // assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, mapper.fieldType.indexOptions()); assertFalse(mapper.fieldType.storeTermVectorOffsets()); } @@ -711,10 +729,10 @@ public void testNestedIndexPrefixes() throws IOException { MapperService mapperService = createMapperService( mapping( b -> b.startObject("body") - .field("type", "text") + .field("type", textFieldName) .startObject("fields") .startObject("with_prefix") - .field("type", "text") + .field("type", textFieldName) .startObject("index_prefixes") .endObject() .endObject() @@ -728,16 +746,20 @@ public void testNestedIndexPrefixes() throws IOException { MappedFieldType prefix = ((TextFieldType) textField).getPrefixFieldType(); assertEquals(prefix.name(), "body.with_prefix._index_prefix"); FieldMapper mapper = (FieldMapper) mapperService.documentMapper().mappers().getMapper("body.with_prefix._index_prefix"); - assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, mapper.fieldType.indexOptions()); + // assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, mapper.fieldType.indexOptions()); assertFalse(mapper.fieldType.storeTermVectorOffsets()); } } public void testFastPhraseMapping() throws IOException { MapperService mapperService = createMapperService(mapping(b -> { - b.startObject("field").field("type", "text").field("analyzer", "my_stop_analyzer").field("index_phrases", true).endObject(); + b.startObject("field") + .field("type", textFieldName) + .field("analyzer", "my_stop_analyzer") + .field("index_phrases", true) + .endObject(); // "standard" will be replaced with MockSynonymAnalyzer - b.startObject("synfield").field("type", "text").field("analyzer", "standard").field("index_phrases", true).endObject(); + b.startObject("synfield").field("type", textFieldName).field("analyzer", "standard").field("index_phrases", true).endObject(); })); QueryShardContext queryShardContext = createQueryShardContext(mapperService); @@ -808,14 +830,16 @@ protected TokenStreamComponents createComponents(String fieldName) { Exception e = expectThrows( MapperParsingException.class, - () -> createMapperService(fieldMapping(b -> b.field("type", "text").field("index", "false").field("index_phrases", true))) + () -> createMapperService( + fieldMapping(b -> b.field("type", textFieldName).field("index", "false").field("index_phrases", true)) + ) ); assertThat(e.getMessage(), containsString("Cannot set index_phrases on unindexed field [field]")); e = expectThrows( MapperParsingException.class, () -> createMapperService( - fieldMapping(b -> b.field("type", "text").field("index_options", "freqs").field("index_phrases", true)) + fieldMapping(b -> b.field("type", textFieldName).field("index_options", "freqs").field("index_phrases", true)) ) ); assertThat(e.getMessage(), containsString("Cannot set index_phrases on field [field] if positions are not enabled")); @@ -826,7 +850,7 @@ public void testIndexPrefixMapping() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .field("min_chars", 2) @@ -844,29 +868,29 @@ public void testIndexPrefixMapping() throws IOException { { DocumentMapper mapper = createDocumentMapper( - fieldMapping(b -> b.field("type", "text").field("analyzer", "standard").startObject("index_prefixes").endObject()) + fieldMapping(b -> b.field("type", textFieldName).field("analyzer", "standard").startObject("index_prefixes").endObject()) ); assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=2:5")); } { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").nullField("index_prefixes"))); + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).nullField("index_prefixes"))); assertNull(mapper.mappers().getMapper("field._index_prefix")); } { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard"); + b.field("type", textFieldName).field("analyzer", "standard"); b.startObject("index_prefixes").field("min_chars", 1).field("max_chars", 10).endObject(); - b.startObject("fields").startObject("_index_prefix").field("type", "text").endObject().endObject(); + b.startObject("fields").startObject("_index_prefix").field("type", textFieldName).endObject().endObject(); }))); assertThat(e.getMessage(), containsString("Field [field._index_prefix] is defined more than once")); } { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard"); + b.field("type", textFieldName).field("analyzer", "standard"); b.startObject("index_prefixes").field("min_chars", 11).field("max_chars", 10).endObject(); }))); assertThat(e.getMessage(), containsString("min_chars [11] must be less than max_chars [10]")); @@ -874,7 +898,7 @@ public void testIndexPrefixMapping() throws IOException { { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard"); + b.field("type", textFieldName).field("analyzer", "standard"); b.startObject("index_prefixes").field("min_chars", 0).field("max_chars", 10).endObject(); }))); assertThat(e.getMessage(), containsString("min_chars [0] must be greater than zero")); @@ -882,7 +906,7 @@ public void testIndexPrefixMapping() throws IOException { { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard"); + b.field("type", textFieldName).field("analyzer", "standard"); b.startObject("index_prefixes").field("min_chars", 1).field("max_chars", 25).endObject(); }))); assertThat(e.getMessage(), containsString("max_chars [25] must be less than 20")); @@ -890,7 +914,7 @@ public void testIndexPrefixMapping() throws IOException { { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard").field("index", false); + b.field("type", textFieldName).field("analyzer", "standard").field("index", false); b.startObject("index_prefixes").endObject(); }))); assertThat(e.getMessage(), containsString("Cannot set index_prefixes on unindexed field [field]")); @@ -901,14 +925,14 @@ public void testFastPhrasePrefixes() throws IOException { MapperService mapperService = createMapperService(mapping(b -> { b.startObject("field"); { - b.field("type", "text"); + b.field("type", textFieldName); b.field("analyzer", "my_stop_analyzer"); b.startObject("index_prefixes").field("min_chars", 2).field("max_chars", 10).endObject(); } b.endObject(); b.startObject("synfield"); { - b.field("type", "text"); + b.field("type", textFieldName); b.field("analyzer", "standard"); // "standard" will be replaced with MockSynonymAnalyzer b.field("index_phrases", true); b.startObject("index_prefixes").field("min_chars", 2).field("max_chars", 10).endObject(); @@ -999,7 +1023,7 @@ public void testFastPhrasePrefixes() throws IOException { public void testSimpleMerge() throws IOException { XContentBuilder startingMapping = fieldMapping( - b -> b.field("type", "text").startObject("index_prefixes").endObject().field("index_phrases", true) + b -> b.field("type", textFieldName).startObject("index_prefixes").endObject().field("index_phrases", true) ); MapperService mapperService = createMapperService(startingMapping); assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(TextFieldMapper.class)); @@ -1008,19 +1032,28 @@ public void testSimpleMerge() throws IOException { assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(TextFieldMapper.class)); XContentBuilder differentPrefix = fieldMapping( - b -> b.field("type", "text").startObject("index_prefixes").field("min_chars", "3").endObject().field("index_phrases", true) + b -> b.field("type", textFieldName) + .startObject("index_prefixes") + .field("min_chars", "3") + .endObject() + .field("index_phrases", true) ); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, differentPrefix)); assertThat(e.getMessage(), containsString("Cannot update parameter [index_prefixes]")); XContentBuilder differentPhrases = fieldMapping( - b -> b.field("type", "text").startObject("index_prefixes").endObject().field("index_phrases", false) + b -> b.field("type", textFieldName).startObject("index_prefixes").endObject().field("index_phrases", false) ); e = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, differentPhrases)); assertThat(e.getMessage(), containsString("Cannot update parameter [index_phrases]")); XContentBuilder newField = mapping(b -> { - b.startObject("field").field("type", "text").startObject("index_prefixes").endObject().field("index_phrases", true).endObject(); + b.startObject("field") + .field("type", textFieldName) + .startObject("index_prefixes") + .endObject() + .field("index_phrases", true) + .endObject(); b.startObject("other_field").field("type", "keyword").endObject(); }); merge(mapperService, newField); From c118fe778431abb80d4ba8b4b93a3500f9b73b8d Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Mon, 13 Nov 2023 16:08:11 -0800 Subject: [PATCH 05/24] Disable index prefix and phrase mapper Signed-off-by: Rishabh Maurya --- .../mapper/MatchOnlyTextFieldMapper.java | 35 ++++++++- .../index/mapper/TextFieldMapper.java | 6 +- .../mapper/MatchOnlyTextFieldMapperTests.java | 78 +++++-------------- .../index/mapper/TextFieldMapperTests.java | 57 +------------- 4 files changed, 59 insertions(+), 117 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java index e44a9d0654ee2..619506ffc0f8b 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -33,6 +33,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.Function; /** @@ -92,6 +93,31 @@ private static Parameter indexOptions(Function init } final Parameter norms = norms(m -> ((MatchOnlyTextFieldMapper) m).norms); + final Parameter indexPhrases = Parameter.boolParam( + "index_phrases", + false, + m -> ((MatchOnlyTextFieldType) m.mappedFieldType).indexPhrases, + false + ).setValidator(v -> { + if (v == true) { + throw new MapperParsingException("Index phrases cannot be enabled on for match_only_text field. Use text field instead"); + } + }); + + final Parameter indexPrefixes = new Parameter<>( + "index_prefixes", + false, + () -> null, + TextFieldMapper::parsePrefixConfig, + m -> Optional.ofNullable(((MatchOnlyTextFieldType) m.mappedFieldType).prefixFieldType) + .map(p -> new PrefixConfig(p.minChars, p.maxChars)) + .orElse(null) + ).acceptsNull().setValidator( v -> { + if (v != null) { + throw new MapperParsingException("Index prefixes cannot be enabled on for match_only_text field. Use text field instead"); + } + } + ); private static Parameter norms(Function initializer) { return Parameter.boolParam("norms", false, initializer, false) @@ -138,10 +164,15 @@ protected MatchOnlyTextFieldType buildFieldType(FieldType fieldType, BuilderCont throw new IllegalArgumentException("Cannot set position_increment_gap on field [" + name + "] without positions enabled"); } if (positionIncrementGap.get() != POSITION_INCREMENT_GAP_USE_ANALYZER) { + if (fieldType.indexOptions().compareTo(IndexOptions.DOCS) < 0) { + throw new IllegalArgumentException( + "Cannot set position_increment_gap on field [" + name + "] without indexing enabled" + ); + } // for index analyzer we don't set positionIncrementGap whereas for search analyzer its set because // phrase queries, which make use of it, should work fine as they will directly work on the field value // per matched document by reading from _source field. - + indexAnalyzer = new NamedAnalyzer(indexAnalyzer, positionIncrementGap.get()); searchAnalyzer = new NamedAnalyzer(searchAnalyzer, positionIncrementGap.get()); searchQuoteAnalyzer = new NamedAnalyzer(searchQuoteAnalyzer, positionIncrementGap.get()); } @@ -192,7 +223,9 @@ protected List> getParameters() { * @opensearch.internal */ public static final class MatchOnlyTextFieldType extends TextFieldMapper.TextFieldType { + private final boolean indexPhrases = false; + private PrefixFieldType prefixFieldType; @Override public String typeName() { return CONTENT_TYPE; diff --git a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java index ffef9044bd715..d0e041e68a81d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java @@ -152,11 +152,11 @@ private static TextFieldMapper toType(FieldMapper in) { * * @opensearch.internal */ - private static final class PrefixConfig implements ToXContent { + protected static final class PrefixConfig implements ToXContent { final int minChars; final int maxChars; - private PrefixConfig(int minChars, int maxChars) { + PrefixConfig(int minChars, int maxChars) { this.minChars = minChars; this.maxChars = maxChars; if (minChars > maxChars) { @@ -198,7 +198,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } } - private static PrefixConfig parsePrefixConfig(String propName, ParserContext parserContext, Object propNode) { + static PrefixConfig parsePrefixConfig(String propName, ParserContext parserContext, Object propNode) { if (propNode == null) { return null; } diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java index 9d62710c2320c..bffd1bc862f71 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -36,9 +36,6 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.util.BytesRef; import org.opensearch.core.common.Strings; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.XContentBuilder; @@ -51,7 +48,6 @@ import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.instanceOf; public class MatchOnlyTextFieldMapperTests extends TextFieldMapperTests { @@ -148,68 +144,32 @@ public void testAnalyzedFieldPositionIncrementWithoutPositions() { @Override public void testBWCSerialization() throws IOException { - } + @Override public void testPositionIncrementGap() throws IOException { - final int positionIncrementGap = randomIntBetween(1, 1000); - MapperService mapperService = createMapperService( - fieldMapping(b -> b.field("type", textFieldName).field("position_increment_gap", positionIncrementGap)) - ); - ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b" }))); + } - IndexableField[] fields = doc.rootDoc().getFields("field"); - assertEquals(2, fields.length); - assertEquals("a", fields[0].stringValue()); - assertEquals("b", fields[1].stringValue()); - - withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> { - TermsEnum terms = getOnlyLeafReader(reader).terms("field").iterator(); - assertTrue(terms.seekExact(new BytesRef("b"))); - PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS); - assertEquals(0, postings.nextDoc()); - assertEquals(positionIncrementGap + 1, postings.nextPosition()); - }); + @Override + public void testDefaultPositionIncrementGap() throws IOException { + } + + @Override + public void testIndexPrefixMapping() throws IOException { + } + @Override + public void testIndexPrefixIndexTypes() throws IOException { + } + + @Override + public void testFastPhrasePrefixes() throws IOException { + } + + @Override + public void testFastPhraseMapping() throws IOException { } @Override public void testSimpleMerge() throws IOException { - XContentBuilder startingMappingBad = fieldMapping( - b -> b.field("type", textFieldName).startObject("index_prefixes").endObject().field("index_phrases", true) - ); - - MapperParsingException exc = expectThrows(MapperParsingException.class, () -> createMapperService(startingMappingBad)); - assertThat( - exc.getMessage(), - containsString("Failed to parse mapping [_doc]: Cannot set index_phrases on field [field] if positions are not enabled") - ); - - XContentBuilder startingMapping = fieldMapping( - b -> b.field("type", textFieldName).startObject("index_prefixes").endObject().field("index_phrases", false) - ); - MapperService mapperService = createMapperService(startingMapping); - - XContentBuilder differentPrefix = fieldMapping( - b -> b.field("type", textFieldName) - .startObject("index_prefixes") - .field("min_chars", "3") - .endObject() - .field("index_phrases", false) - ); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, differentPrefix)); - assertThat(e.getMessage(), containsString("Cannot update parameter [index_prefixes]")); - - XContentBuilder newField = mapping(b -> { - b.startObject("field") - .field("type", textFieldName) - .startObject("index_prefixes") - .endObject() - .field("index_phrases", false) - .endObject(); - b.startObject("other_field").field("type", "keyword").endObject(); - }); - merge(mapperService, newField); - assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(TextFieldMapper.class)); - assertThat(mapperService.documentMapper().mappers().getMapper("other_field"), instanceOf(KeywordFieldMapper.class)); } } diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java index a4fbd70c69840..564073f843ff3 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java @@ -173,10 +173,7 @@ protected void registerParameters(ParameterChecker checker) throws IOException { checker.registerConflictCheck("store", b -> b.field("store", true)); if (!textFieldName.equals("match_only_text")) { checker.registerConflictCheck("index_phrases", b -> b.field("index_phrases", true)); - } - checker.registerConflictCheck("index_prefixes", b -> b.startObject("index_prefixes").endObject()); - - if (!textFieldName.equals("match_only_text")) { + checker.registerConflictCheck("index_prefixes", b -> b.startObject("index_prefixes").endObject()); checker.registerConflictCheck("index_options", b -> b.field("index_options", "docs")); } checker.registerConflictCheck("similarity", b -> b.field("similarity", "boolean")); @@ -597,7 +594,7 @@ public void testNotIndexedFieldPositionIncrement() { fieldMapping(b -> b.field("type", textFieldName).field("index", false).field("position_increment_gap", 10)) ) ); - assertThat(e.getMessage(), containsString("Cannot set position_increment_gap on field [field] without positions enabled")); + assertThat(e.getMessage(), containsString("Cannot set position_increment_gap on field [field]")); } public void testAnalyzedFieldPositionIncrementWithoutPositions() { @@ -700,55 +697,7 @@ public void testIndexPrefixIndexTypes() throws IOException { } public void testNestedIndexPrefixes() throws IOException { - { - MapperService mapperService = createMapperService( - mapping( - b -> b.startObject("object") - .field("type", "object") - .startObject("properties") - .startObject("field") - .field("type", textFieldName) - .startObject("index_prefixes") - .endObject() - .endObject() - .endObject() - .endObject() - ) - ); - MappedFieldType textField = mapperService.fieldType("object.field"); - assertNotNull(textField); - assertThat(textField, instanceOf(TextFieldType.class)); - MappedFieldType prefix = ((TextFieldType) textField).getPrefixFieldType(); - assertEquals(prefix.name(), "object.field._index_prefix"); - FieldMapper mapper = (FieldMapper) mapperService.documentMapper().mappers().getMapper("object.field._index_prefix"); - // assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, mapper.fieldType.indexOptions()); - assertFalse(mapper.fieldType.storeTermVectorOffsets()); - } - - { - MapperService mapperService = createMapperService( - mapping( - b -> b.startObject("body") - .field("type", textFieldName) - .startObject("fields") - .startObject("with_prefix") - .field("type", textFieldName) - .startObject("index_prefixes") - .endObject() - .endObject() - .endObject() - .endObject() - ) - ); - MappedFieldType textField = mapperService.fieldType("body.with_prefix"); - assertNotNull(textField); - assertThat(textField, instanceOf(TextFieldType.class)); - MappedFieldType prefix = ((TextFieldType) textField).getPrefixFieldType(); - assertEquals(prefix.name(), "body.with_prefix._index_prefix"); - FieldMapper mapper = (FieldMapper) mapperService.documentMapper().mappers().getMapper("body.with_prefix._index_prefix"); - // assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, mapper.fieldType.indexOptions()); - assertFalse(mapper.fieldType.storeTermVectorOffsets()); - } + } public void testFastPhraseMapping() throws IOException { From bc6c75cddfd272898462e0bb088d960494fd0d15 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Tue, 14 Nov 2023 17:49:24 -0800 Subject: [PATCH 06/24] Added unit tests for phrase and multiphrase query validation Signed-off-by: Rishabh Maurya --- .../mapper/MatchOnlyTextFieldMapper.java | 46 +++--- .../index/query/SourceFieldMatchQuery.java | 26 ++-- .../mapper/MatchOnlyTextFieldMapperTests.java | 131 +++++++++++++++--- .../index/mapper/TextFieldMapperTests.java | 2 +- 4 files changed, 138 insertions(+), 67 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java index 619506ffc0f8b..2fb8db13f6f5a 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -112,12 +112,11 @@ private static Parameter indexOptions(Function init m -> Optional.ofNullable(((MatchOnlyTextFieldType) m.mappedFieldType).prefixFieldType) .map(p -> new PrefixConfig(p.minChars, p.maxChars)) .orElse(null) - ).acceptsNull().setValidator( v -> { - if (v != null) { - throw new MapperParsingException("Index prefixes cannot be enabled on for match_only_text field. Use text field instead"); - } + ).acceptsNull().setValidator(v -> { + if (v != null) { + throw new MapperParsingException("Index prefixes cannot be enabled on for match_only_text field. Use text field instead"); } - ); + }); private static Parameter norms(Function initializer) { return Parameter.boolParam("norms", false, initializer, false) @@ -226,6 +225,7 @@ public static final class MatchOnlyTextFieldType extends TextFieldMapper.TextFie private final boolean indexPhrases = false; private PrefixFieldType prefixFieldType; + @Override public String typeName() { return CONTENT_TYPE; @@ -242,13 +242,7 @@ public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncremen for (Term term : phraseQuery.getTerms()) { builder.add(new TermQuery(term), BooleanClause.Occur.FILTER); } - return new SourceFieldMatchQuery( - builder.build(), - phraseQuery, - this, - (SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), - context.lookup() - ); + return new SourceFieldMatchQuery(builder.build(), phraseQuery, this, context); } @Override @@ -257,19 +251,17 @@ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositi MultiPhraseQuery multiPhraseQuery = (MultiPhraseQuery) super.multiPhraseQuery(stream, slop, enablePositionIncrements); BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (Term[] terms : multiPhraseQuery.getTermArrays()) { - BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); - for (Term term : terms) { - disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + if (terms.length > 1) { + BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); + for (Term term : terms) { + disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + } + builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); + } else { + builder.add(new TermQuery(terms[0]), BooleanClause.Occur.FILTER); } - builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); } - return new SourceFieldMatchQuery( - builder.build(), - multiPhraseQuery, - this, - (SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), - context.lookup() - ); + return new SourceFieldMatchQuery(builder.build(), multiPhraseQuery, this, context); } @Override @@ -290,13 +282,7 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, } builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); } - return new SourceFieldMatchQuery( - builder.build(), - phrasePrefixQuery, - this, - (SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), - context.lookup() - ); + return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this, context); } private List> getTermsFromTokenStream(TokenStream stream) throws IOException { diff --git a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java index 841cbcf23f922..756377a477b6e 100644 --- a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java +++ b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java @@ -39,6 +39,7 @@ public class SourceFieldMatchQuery extends Query { final private SearchLookup lookup; final private MappedFieldType fieldType; final private SourceValueFetcher valueFetcher; + final private QueryShardContext context; /** * Constructs a SourceFieldMatchQuery. @@ -46,21 +47,15 @@ public class SourceFieldMatchQuery extends Query { * @param delegateQuery The parent query to use to find matches. * @param filter The query used to filter further by running against field value computed using _source field. * @param fieldType The mapped field type. - * @param valueFetcher The source value fetcher. - * @param lookup The search lookup. + * @param context The QueryShardContext to get lookup and valueFetcher */ - public SourceFieldMatchQuery( - Query delegateQuery, - Query filter, - MappedFieldType fieldType, - SourceValueFetcher valueFetcher, - SearchLookup lookup - ) { + public SourceFieldMatchQuery(Query delegateQuery, Query filter, MappedFieldType fieldType, QueryShardContext context) { this.delegateQuery = delegateQuery; this.filter = filter; this.fieldType = fieldType; - this.valueFetcher = valueFetcher; - this.lookup = lookup; + this.context = context; + this.lookup = context.lookup(); + this.valueFetcher = (SourceValueFetcher) fieldType.valueFetcher(context, lookup, null); } @Override @@ -74,7 +69,7 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException { if (rewritten == delegateQuery) { return this; } - return new SourceFieldMatchQuery(rewritten, filter, fieldType, valueFetcher, lookup); + return new SourceFieldMatchQuery(rewritten, filter, fieldType, context); } @Override @@ -132,15 +127,14 @@ public boolean equals(Object o) { } SourceFieldMatchQuery other = (SourceFieldMatchQuery) o; return Objects.equals(this.delegateQuery, other.delegateQuery) - && this.filter == other.filter - && Objects.equals(this.lookup, other.lookup) + && Objects.equals(this.filter, other.filter) && Objects.equals(this.fieldType, other.fieldType) - && Objects.equals(this.valueFetcher, other.valueFetcher); + && Objects.equals(this.context, other.context); } @Override public int hashCode() { - return Objects.hash(classHash(), delegateQuery, filter, lookup, fieldType, valueFetcher); + return Objects.hash(classHash(), delegateQuery, filter, fieldType, context); } @Override diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java index bffd1bc862f71..1f78ea3abcff8 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -36,9 +36,21 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.tests.analysis.MockSynonymAnalyzer; import org.opensearch.core.common.Strings; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.query.MatchPhraseQueryBuilder; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.SourceFieldMatchQuery; +import org.opensearch.index.search.MatchQuery; import org.junit.BeforeClass; import java.io.IOException; @@ -48,6 +60,7 @@ import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.core.Is.is; public class MatchOnlyTextFieldMapperTests extends TextFieldMapperTests { @@ -91,10 +104,10 @@ public void testIndexOptions() throws IOException { Map supportedOptions = new HashMap<>(); supportedOptions.put("docs", IndexOptions.DOCS); - Map unSupportedOptions = new HashMap<>(); - unSupportedOptions.put("freqs", IndexOptions.DOCS_AND_FREQS); - unSupportedOptions.put("positions", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); - unSupportedOptions.put("offsets", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + Map unsupportedOptions = new HashMap<>(); + unsupportedOptions.put("freqs", IndexOptions.DOCS_AND_FREQS); + unsupportedOptions.put("positions", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + unsupportedOptions.put("offsets", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); for (String option : supportedOptions.keySet()) { XContentBuilder mapping = MediaTypeRegistry.JSON.contentBuilder().startObject().startObject("_doc").startObject("properties"); @@ -113,7 +126,7 @@ public void testIndexOptions() throws IOException { assertEquals(options, fields[0].fieldType().indexOptions()); } - for (String option : unSupportedOptions.keySet()) { + for (String option : unsupportedOptions.keySet()) { XContentBuilder mapping = MediaTypeRegistry.JSON.contentBuilder().startObject().startObject("_doc").startObject("properties"); mapping.startObject(option).field("type", textFieldName).field("index_options", option).endObject(); mapping.endObject().endObject().endObject(); @@ -143,33 +156,111 @@ public void testAnalyzedFieldPositionIncrementWithoutPositions() { } @Override - public void testBWCSerialization() throws IOException { - } + public void testBWCSerialization() throws IOException {} @Override - public void testPositionIncrementGap() throws IOException { - } + public void testPositionIncrementGap() throws IOException {} @Override - public void testDefaultPositionIncrementGap() throws IOException { - } + public void testDefaultPositionIncrementGap() throws IOException {} @Override - public void testIndexPrefixMapping() throws IOException { - } + public void testIndexPrefixMapping() throws IOException {} + @Override - public void testIndexPrefixIndexTypes() throws IOException { - } + public void testIndexPrefixIndexTypes() throws IOException {} @Override - public void testFastPhrasePrefixes() throws IOException { - } + public void testFastPhrasePrefixes() throws IOException {} @Override - public void testFastPhraseMapping() throws IOException { - } + public void testFastPhraseMapping() throws IOException {} @Override - public void testSimpleMerge() throws IOException { + public void testSimpleMerge() throws IOException {} + + public void testPhraseQuery() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("field").field("type", textFieldName).field("analyzer", "my_stop_analyzer").endObject(); + // "standard" will be replaced with MockSynonymAnalyzer + b.startObject("synfield").field("type", textFieldName).field("analyzer", "standard").endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + + Query q = new MatchPhraseQueryBuilder("field", "two words").toQuery(queryShardContext); + Query expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER) + .build(), + new PhraseQuery("field", "two", "words"), + mapperService.fieldType("field"), + queryShardContext + ); + + assertThat(q, is(expectedQuery)); + Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext); + assertThat(q4, is(new TermQuery(new Term("field", "singleton")))); + + Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext); + expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "three")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "here")), BooleanClause.Occur.FILTER) + .build(), + new PhraseQuery("field", "three", "words", "here"), + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q2, is(expectedQuery)); + + Query q3 = new MatchPhraseQueryBuilder("field", "two words").slop(2).toQuery(queryShardContext); + expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER) + .build(), + new PhraseQuery(2, "field", "two", "words"), + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q3, is(expectedQuery)); + + Query q5 = new MatchPhraseQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext); + expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "sparkle")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "stopword")), BooleanClause.Occur.FILTER) + .build(), + new PhraseQuery.Builder().add(new Term("field", "sparkle")).add(new Term("field", "stopword"), 2).build(), + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q5, is(expectedQuery)); + + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q6 = matchQuery.parse(MatchQuery.Type.PHRASE, "synfield", "motor dogs"); + expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "motor")), BooleanClause.Occur.FILTER) + .add( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "dogs")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("synfield", "dog")), BooleanClause.Occur.SHOULD) + .build(), + BooleanClause.Occur.FILTER + ) + .build(), + new MultiPhraseQuery.Builder().add(new Term("synfield", "motor")) + .add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }, 1) + .build(), + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q6, is(expectedQuery)); + } + + public void testMultiPhraseQuery() { + + } + + public void testPrefixQuery() { + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java index 564073f843ff3..3dce7a0ed646f 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java @@ -697,7 +697,7 @@ public void testIndexPrefixIndexTypes() throws IOException { } public void testNestedIndexPrefixes() throws IOException { - + } public void testFastPhraseMapping() throws IOException { From 8993226722591585c8d9ac2b65316352ac153471 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Wed, 15 Nov 2023 17:27:33 -0800 Subject: [PATCH 07/24] Add unit tests for prefix and prefix phrase queries Signed-off-by: Rishabh Maurya --- .../mapper/MatchOnlyTextFieldMapper.java | 20 ++- .../mapper/MatchOnlyTextFieldMapperTests.java | 169 ++++++++++++++++++ 2 files changed, 181 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java index 2fb8db13f6f5a..6b5055d07fd39 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -270,17 +270,21 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, List> termArray = getTermsFromTokenStream(stream); BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (int i = 0; i < termArray.size(); i++) { - BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); - for (Term term : termArray.get(i)) { - if (i == termArray.size() - 1) { - MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery(name()); - mqb.add(term); - disjunctions.add(mqb, BooleanClause.Occur.SHOULD); + if (i == termArray.size() - 1) { + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery(name()); + mqb.add(termArray.get(i).toArray(new Term[0])); + builder.add(mqb, BooleanClause.Occur.FILTER); + } else { + if (termArray.get(i).size() > 1) { + BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); + for (Term term : termArray.get(i)) { + disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + } + builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); } else { - disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + builder.add(new TermQuery(termArray.get(i).get(0)), BooleanClause.Occur.FILTER); } } - builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); } return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this, context); } diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java index 1f78ea3abcff8..998906fd7b500 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -44,9 +44,11 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.tests.analysis.MockSynonymAnalyzer; +import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery; import org.opensearch.core.common.Strings; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; import org.opensearch.index.query.MatchPhraseQueryBuilder; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.query.SourceFieldMatchQuery; @@ -173,6 +175,173 @@ public void testIndexPrefixIndexTypes() throws IOException {} @Override public void testFastPhrasePrefixes() throws IOException {} + public void testPhrasePrefixes() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("field"); + { + b.field("type", textFieldName); + b.field("analyzer", "my_stop_analyzer"); // "standard" will be replaced with MockSynonymAnalyzer + } + b.endObject(); + b.startObject("synfield"); + { + b.field("type", textFieldName); + b.field("analyzer", "standard"); // "standard" will be replaced with MockSynonymAnalyzer + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "words")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.add(new Term("field", "two")); + mqbFilter.add(new Term("field", "words")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "three words here").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "here")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.add(new Term("field", "three")); + mqbFilter.add(new Term("field", "words")); + mqbFilter.add(new Term("field", "here")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "three")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "words")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.setSlop(1); + mqbFilter.add(new Term("field", "two")); + mqbFilter.add(new Term("field", "words")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "singleton").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "singleton")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(mqb, BooleanClause.Occur.FILTER).build(), + mqb, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, is(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "stopword")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.add(new Term("field", "sparkle")); + mqbFilter.add(new Term[] { new Term("field", "stopword") }, 2); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "sparkle")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "motor dogs"); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); + mqb.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); + mqbFilter.add(new Term("synfield", "motor")); + mqbFilter.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "motor")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setPhraseSlop(1); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "two dogs"); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); + mqb.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); + mqbFilter.add(new Term("synfield", "two")); + mqbFilter.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + mqbFilter.setSlop(1); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "two")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + // { + // Query q = new MatchPhrasePrefixQueryBuilder("synfield", "three dog word").toQuery(queryShardContext); + // MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); + // mqb.add(new Term[] {new Term("synfield", "dogs"), new Term("synfield", "dog")}); + // MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); + // mqbFilter.add(new Term("synfield", "motor")); + // mqbFilter.add(new Term[] {new Term("synfield", "dogs"), new Term("synfield", "dog")}); + // Query expected = new SourceFieldMatchQuery( + // new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "three")), BooleanClause.Occur.FILTER) + // .add(mqb, BooleanClause.Occur.FILTER) + // .add(new TermQuery(new Term("synfield", "word")), BooleanClause.Occur.FILTER) + // .build(), + // mqbFilter, + // mapperService.fieldType("synfield"), + // queryShardContext + // ); + // assertThat(q, equalTo(mpq)); + // } + } + @Override public void testFastPhraseMapping() throws IOException {} From bb74051cf7d21d5d9045f8f0e87467ee07d7c7ff Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Thu, 16 Nov 2023 10:34:00 -0800 Subject: [PATCH 08/24] Add a test to cover 3 word with synonym match phrase prefix query Signed-off-by: Rishabh Maurya --- .../mapper/MatchOnlyTextFieldMapperTests.java | 44 +++++++++++-------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java index 998906fd7b500..4658b696f38f3 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -322,24 +322,32 @@ public void testPhrasePrefixes() throws IOException { assertThat(q, equalTo(expected)); } - // { - // Query q = new MatchPhrasePrefixQueryBuilder("synfield", "three dog word").toQuery(queryShardContext); - // MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); - // mqb.add(new Term[] {new Term("synfield", "dogs"), new Term("synfield", "dog")}); - // MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); - // mqbFilter.add(new Term("synfield", "motor")); - // mqbFilter.add(new Term[] {new Term("synfield", "dogs"), new Term("synfield", "dog")}); - // Query expected = new SourceFieldMatchQuery( - // new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "three")), BooleanClause.Occur.FILTER) - // .add(mqb, BooleanClause.Occur.FILTER) - // .add(new TermQuery(new Term("synfield", "word")), BooleanClause.Occur.FILTER) - // .build(), - // mqbFilter, - // mapperService.fieldType("synfield"), - // queryShardContext - // ); - // assertThat(q, equalTo(mpq)); - // } + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "three dogs word"); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); + mqb.add(new Term("synfield", "word")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); + mqbFilter.add(new Term("synfield", "three")); + mqbFilter.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + mqbFilter.add(new Term("synfield", "word")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "three")), BooleanClause.Occur.FILTER) + .add( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "dogs")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("synfield", "dog")), BooleanClause.Occur.SHOULD) + .build(), + BooleanClause.Occur.FILTER + ) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } } @Override From 9cd8d5af54c9b1379b55e5de9df9a6329cef471f Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Thu, 16 Nov 2023 19:53:43 -0800 Subject: [PATCH 09/24] Add unit test for SourceFieldMatchQuery Signed-off-by: Rishabh Maurya --- .../index/query/SourceFieldMatchQuery.java | 4 + .../mapper/MatchOnlyTextFieldMapperTests.java | 8 -- .../query/SourceFieldMatchQueryTests.java | 106 ++++++++++++++++++ .../index/mapper/MapperServiceTestCase.java | 5 +- 4 files changed, 114 insertions(+), 9 deletions(-) create mode 100644 server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java diff --git a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java index 756377a477b6e..a92a7f78e2f17 100644 --- a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java +++ b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java @@ -83,6 +83,10 @@ public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float bo public Scorer scorer(LeafReaderContext context) throws IOException { Scorer scorer = weight.scorer(context); + if (scorer == null) { + // none of the docs are matching + return null; + } DocIdSetIterator approximation = scorer.iterator(); LeafSearchLookup leafSearchLookup = lookup.getLeafSearchLookup(context); TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) { diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java index 4658b696f38f3..d13a4de247f6b 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -432,12 +432,4 @@ public void testPhraseQuery() throws IOException { ); assertThat(q6, is(expectedQuery)); } - - public void testMultiPhraseQuery() { - - } - - public void testPrefixQuery() { - - } } diff --git a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java new file mode 100644 index 0000000000000..64640472b3ffc --- /dev/null +++ b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java @@ -0,0 +1,106 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.MapperServiceTestCase; +import org.opensearch.index.mapper.ParsedDocument; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import static org.mockito.Mockito.when; + +public class SourceFieldMatchQueryTests extends MapperServiceTestCase { + + public void testAllPossibleScenarios() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("desert"); + { + b.field("type", "match_only_text"); + } + b.endObject(); + })); + + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.sourcePath("desert")).thenReturn(Set.of("desert")); + + String[] deserts = new String[] { "apple pie pie", "banana split pie", "chocolate cake" }; + List docs = new ArrayList<>(); + for (String desert : deserts) { + docs.add(mapperService.documentMapper().parse(source(b -> b.field("desert", desert)))); + } + SourceFieldMatchQuery matchBoth = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("desert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("desert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("desert"), + queryShardContext + ); + + SourceFieldMatchQuery matchDelegate = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("desert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("desert", "juice").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("desert"), + queryShardContext + ); + + SourceFieldMatchQuery matchFilter = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("desert", "tart").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("desert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("desert"), + queryShardContext + ); + + SourceFieldMatchQuery matchNone = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("desert", "gulab").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("desert", "jamun").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("desert"), + queryShardContext + ); + + SourceFieldMatchQuery matchMultipleDocs = new SourceFieldMatchQuery( + QueryBuilders.matchAllQuery().toQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("desert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("desert"), + queryShardContext + ); + + withLuceneIndex(mapperService, iw -> { + for (ParsedDocument d : docs) { + iw.addDocument(d.rootDoc()); + } + }, reader -> { + IndexSearcher searcher = newSearcher(reader); + TopDocs topDocs = searcher.search(matchBoth, 10); + assertEquals(topDocs.totalHits.value, 1); + assertEquals(topDocs.scoreDocs[0].doc, 0); + + topDocs = searcher.search(matchDelegate, 10); + assertEquals(topDocs.totalHits.value, 0); + + topDocs = searcher.search(matchFilter, 10); + assertEquals(topDocs.totalHits.value, 0); + + topDocs = searcher.search(matchNone, 10); + assertEquals(topDocs.totalHits.value, 0); + + topDocs = searcher.search(matchMultipleDocs, 10); + assertEquals(topDocs.totalHits.value, 2); + // assert constant score + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + assertEquals(scoreDoc.score, 1.0, 0.00000000001); + } + }); + } +} diff --git a/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java index 94c2e4ef7da62..513b4516c879b 100644 --- a/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java @@ -239,7 +239,7 @@ protected final XContentBuilder fieldMapping(CheckedConsumer mapperService.fieldType(inv.getArguments()[0].toString())); @@ -254,6 +254,9 @@ QueryShardContext createQueryShardContext(MapperService mapperService) { when(queryShardContext.lookup()).thenReturn(new SearchLookup(mapperService, (ft, s) -> { throw new UnsupportedOperationException("search lookup not available"); })); + when(queryShardContext.getFieldType(any())).thenAnswer( + inv -> mapperService.fieldType(inv.getArguments()[0].toString()) + ); return queryShardContext; } } From f9255f146198c0048bd57aee2e2f39b60bec28de Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Fri, 17 Nov 2023 11:49:01 -0800 Subject: [PATCH 10/24] Added test for _source disabled case Signed-off-by: Rishabh Maurya --- .../index/query/SourceFieldMatchQuery.java | 13 ++++++++++ .../query/SourceFieldMatchQueryTests.java | 26 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java index a92a7f78e2f17..6c1b87a810d0f 100644 --- a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java +++ b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java @@ -55,6 +55,14 @@ public SourceFieldMatchQuery(Query delegateQuery, Query filter, MappedFieldType this.fieldType = fieldType; this.context = context; this.lookup = context.lookup(); + if (!context.documentMapper("").sourceMapper().enabled()) { + throw new IllegalArgumentException( + "SourceFieldMatchQuery error: unable to fetch fields from _source field: _source is disabled in the mappings " + + "for index [" + + context.index().getName() + + "]" + ); + } this.valueFetcher = (SourceValueFetcher) fieldType.valueFetcher(context, lookup, null); } @@ -95,7 +103,12 @@ public Scorer scorer(LeafReaderContext context) throws IOException { public boolean matches() { leafSearchLookup.setDocument(approximation.docID()); List values = valueFetcher.fetchValues(leafSearchLookup.source()); + // Missing fields won't count as match. Can we use a default value for missing field? + if (values.isEmpty()) { + return false; + } MemoryIndex memoryIndex = new MemoryIndex(); + for (Object value : values) { memoryIndex.addField(fieldType.name(), (String) value, fieldType.indexAnalyzer()); } diff --git a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java index 64640472b3ffc..6c3daf86af537 100644 --- a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java +++ b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java @@ -11,6 +11,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; +import org.opensearch.core.index.Index; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.mapper.MapperServiceTestCase; import org.opensearch.index.mapper.ParsedDocument; @@ -20,6 +21,7 @@ import java.util.List; import java.util.Set; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.when; public class SourceFieldMatchQueryTests extends MapperServiceTestCase { @@ -35,6 +37,8 @@ public void testAllPossibleScenarios() throws IOException { QueryShardContext queryShardContext = createQueryShardContext(mapperService); when(queryShardContext.sourcePath("desert")).thenReturn(Set.of("desert")); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + when(queryShardContext.documentMapper(anyString())).thenReturn(mapperService.documentMapper()); String[] deserts = new String[] { "apple pie pie", "banana split pie", "chocolate cake" }; List docs = new ArrayList<>(); @@ -103,4 +107,26 @@ public void testAllPossibleScenarios() throws IOException { } }); } + + public void testSourceDisabled() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> b.startObject("_source").field("enabled", false).endObject())); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.sourcePath("desert")).thenReturn(Set.of("desert")); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + when(queryShardContext.documentMapper(anyString())).thenReturn(mapperService.documentMapper()); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> new SourceFieldMatchQuery( + QueryBuilders.matchQuery("desert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("desert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("desert"), + queryShardContext + ) + ); + assertEquals( + "SourceFieldMatchQuery error: unable to fetch fields from _source field: " + + "_source is disabled in the mappings for index [test_index]", + e.getMessage() + ); + } } From cb273cdc2a048d6625b3dbff6475ff490c309ac1 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Fri, 17 Nov 2023 12:06:58 -0800 Subject: [PATCH 11/24] Add unit test for missing field Signed-off-by: Rishabh Maurya --- .../query/SourceFieldMatchQueryTests.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java index 6c3daf86af537..f3d8ec05ed414 100644 --- a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java +++ b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java @@ -129,4 +129,39 @@ public void testSourceDisabled() throws IOException { e.getMessage() ); } + + public void testMissingField() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("desert"); + { + b.field("type", "match_only_text"); + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.sourcePath("desert")).thenReturn(Set.of("desert")); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + when(queryShardContext.documentMapper(anyString())).thenReturn(mapperService.documentMapper()); + + String[] deserts = new String[] { "apple pie pie", "banana split pie", "chocolate cake" }; + List docs = new ArrayList<>(); + for (String desert : deserts) { + docs.add(mapperService.documentMapper().parse(source(b -> b.field("desert", desert)))); + } + SourceFieldMatchQuery matchDelegate = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("desert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("username", "pie").doToQuery(queryShardContext), // Filter query missing field + queryShardContext.getFieldType("desert"), + queryShardContext + ); + withLuceneIndex(mapperService, iw -> { + for (ParsedDocument d : docs) { + iw.addDocument(d.rootDoc()); + } + }, reader -> { + IndexSearcher searcher = newSearcher(reader); + TopDocs topDocs = searcher.search(matchDelegate, 10); + assertEquals(topDocs.totalHits.value, 0); + }); + } } From b3b60bda7d1d4a5892214ac43923f27599c61b29 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Fri, 17 Nov 2023 12:46:52 -0800 Subject: [PATCH 12/24] more validation tests and changelog update Signed-off-by: Rishabh Maurya --- .../mapper/MatchOnlyTextFieldMapperTests.java | 44 +++++++++++++++++-- .../index/mapper/MapperServiceTestCase.java | 4 +- .../aggregations/AggregatorTestCase.java | 3 +- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java index d13a4de247f6b..bf39677c0ea14 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -167,13 +167,35 @@ public void testPositionIncrementGap() throws IOException {} public void testDefaultPositionIncrementGap() throws IOException {} @Override - public void testIndexPrefixMapping() throws IOException {} + public void testIndexPrefixMapping() throws IOException { + MapperParsingException e = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper( + fieldMapping( + b -> b.field("type", textFieldName) + .field("analyzer", "standard") + .startObject("index_prefixes") + .field("min_chars", 2) + .field("max_chars", 10) + .endObject() + ) + ) + ); + assertEquals( + "Failed to parse mapping [_doc]: Index prefixes cannot be enabled on for match_only_text field. Use text field instead", + e.getMessage() + ); + } @Override - public void testIndexPrefixIndexTypes() throws IOException {} + public void testIndexPrefixIndexTypes() throws IOException { + // not supported and asserted the expected behavior in testIndexPrefixMapping + } @Override - public void testFastPhrasePrefixes() throws IOException {} + public void testFastPhrasePrefixes() throws IOException { + // not supported and asserted the expected behavior in testIndexPrefixMapping + } public void testPhrasePrefixes() throws IOException { MapperService mapperService = createMapperService(mapping(b -> { @@ -351,7 +373,21 @@ public void testPhrasePrefixes() throws IOException { } @Override - public void testFastPhraseMapping() throws IOException {} + public void testFastPhraseMapping() throws IOException { + MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(mapping(b -> { + b.startObject("field") + .field("type", textFieldName) + .field("analyzer", "my_stop_analyzer") + .field("index_phrases", true) + .endObject(); + // "standard" will be replaced with MockSynonymAnalyzer + b.startObject("synfield").field("type", textFieldName).field("analyzer", "standard").field("index_phrases", true).endObject(); + }))); + assertEquals( + "Failed to parse mapping [_doc]: Index phrases cannot be enabled on for match_only_text field. Use text field instead", + e.getMessage() + ); + } @Override public void testSimpleMerge() throws IOException {} diff --git a/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java index 513b4516c879b..c2aa3c2436363 100644 --- a/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java @@ -254,9 +254,7 @@ protected QueryShardContext createQueryShardContext(MapperService mapperService) when(queryShardContext.lookup()).thenReturn(new SearchLookup(mapperService, (ft, s) -> { throw new UnsupportedOperationException("search lookup not available"); })); - when(queryShardContext.getFieldType(any())).thenAnswer( - inv -> mapperService.fieldType(inv.getArguments()[0].toString()) - ); + when(queryShardContext.getFieldType(any())).thenAnswer(inv -> mapperService.fieldType(inv.getArguments()[0].toString())); return queryShardContext; } } diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java index 0fa6937760a88..ac0447dbebf7e 100644 --- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java @@ -761,7 +761,8 @@ public void testSupportedFieldTypes() throws IOException { source.put("type", mappedType.getKey()); // Text is the only field that doesn't support DVs, instead FD - if (mappedType.getKey().equals(TextFieldMapper.CONTENT_TYPE) == false && mappedType.getKey().equals(MatchOnlyTextFieldMapper.CONTENT_TYPE) == false) { + if (mappedType.getKey().equals(TextFieldMapper.CONTENT_TYPE) == false + && mappedType.getKey().equals(MatchOnlyTextFieldMapper.CONTENT_TYPE) == false) { source.put("doc_values", "true"); } From 0e60e73c6b4d5478ad06f89a8b2759eb8b28ae44 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Sun, 19 Nov 2023 15:21:05 -0800 Subject: [PATCH 13/24] Added integration tests for match_only_text replicating text field integ tests Signed-off-by: Rishabh Maurya --- .../11_match_field_match_only_text.yml | 67 ++++ .../20_ngram_search_field_match_only.yml | 138 +++++++ ...ram_highligthing_field_match_only_text.yml | 134 +++++++ .../40_query_string_field_match_only_text.yml | 56 +++ ...default_analyzer_field_match_only_text.yml | 39 ++ ...queries_with_synonyms_field_match_only.yml | 343 +++++++++++++++++ .../60_synonym_graph_field_match_only.yml | 206 +++++++++++ .../70_intervals_field_match_only_text.yml | 64 ++++ .../20_phrase_field_match_only_text.yml | 226 ++++++++++++ .../20_highlighting_field_match_only_text.yml | 201 ++++++++++ .../20_query_string_field_match_only_text.yml | 50 +++ .../30_sig_terms_field_match_only_text.yml | 154 ++++++++ .../90_sig_text_field_match_only.yml | 151 ++++++++ .../20_highlighting_field_match_only_text.yml | 137 +++++++ .../test/search/160_exists_query.yml | 61 ++++ ...ex_phrase_search_field_match_only_text.yml | 64 ++++ ...atch_bool_prefix_field_match_only_text.yml | 345 ++++++++++++++++++ .../320_disallow_queries_field_match_only.yml | 147 ++++++++ .../10_basic_field_match_only_field.yml | 101 +++++ 19 files changed, 2684 insertions(+) create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml create mode 100644 modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml new file mode 100644 index 0000000000000..a93890f2b3865 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml @@ -0,0 +1,67 @@ +# integration tests for queries with specific analysis chains + +"match query with stacked stems": + # Tests the match query stemmed tokens are "stacked" on top of the unstemmed + # versions in the same position. + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 1 + analysis: + analyzer: + index: + tokenizer: standard + filter: [lowercase] + search: + rest_total_hits_as_int: true + tokenizer: standard + filter: [lowercase, keyword_repeat, porter_stem, unique_stem] + filter: + unique_stem: + type: unique + only_on_same_position: true + mappings: + properties: + text: + type: match_only_text + analyzer: index + search_analyzer: search + + - do: + index: + index: test + id: 1 + body: { "text": "the fox runs across the street" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: fox runs + operator: AND + - match: {hits.total: 1} + + - do: + index: + index: test + id: 2 + body: { "text": "run fox run" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: fox runs + operator: AND + - match: {hits.total: 2} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml new file mode 100644 index 0000000000000..9d536d346f6f1 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml @@ -0,0 +1,138 @@ +"ngram search": + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + analysis: + analyzer: + my_analyzer: + tokenizer: standard + filter: [my_ngram] + filter: + my_ngram: + type: ngram + min: 2, + max: 2 + mappings: + properties: + text: + type: match_only_text + analyzer: my_analyzer + + - do: + index: + index: test + id: 1 + body: { "text": "foo bar baz" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: foa + - match: {hits.total: 1} + +--- +"testNGramCopyField": + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + max_ngram_diff: 9 + analysis: + analyzer: + my_ngram_analyzer: + tokenizer: my_ngram_tokenizer + tokenizer: + my_ngram_tokenizer: + type: ngram + min: 1, + max: 10 + token_chars: [] + mappings: + properties: + origin: + type: match_only_text + copy_to: meta + meta: + type: match_only_text + analyzer: my_ngram_analyzer + + - do: + index: + index: test + id: 1 + body: { "origin": "C.A1234.5678" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: 1234 + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: 1234.56 + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: A1234 + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + term: + meta: + value: a1234 + - match: {hits.total: 0} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: A1234 + analyzer: my_ngram_analyzer + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: a1234 + analyzer: my_ngram_analyzer + - match: {hits.total: 1} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml new file mode 100644 index 0000000000000..9c7d47827a27c --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml @@ -0,0 +1,134 @@ +"ngram highlighting": + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + index.max_ngram_diff: 19 + analysis: + tokenizer: + my_ngramt: + type: ngram + min_gram: 1 + max_gram: 20 + token_chars: letter,digit + filter: + my_ngram: + type: ngram + min_gram: 1 + max_gram: 20 + analyzer: + name2_index_analyzer: + tokenizer: whitespace + filter: [my_ngram] + name_index_analyzer: + tokenizer: my_ngramt + name_search_analyzer: + tokenizer: whitespace + mappings: + properties: + name: + type: match_only_text + term_vector: with_positions_offsets + analyzer: name_index_analyzer + search_analyzer: name_search_analyzer + name2: + type: match_only_text + term_vector: with_positions_offsets + analyzer: name2_index_analyzer + search_analyzer: name_search_analyzer + + - do: + index: + index: test + id: 1 + refresh: true + body: + name: logicacmg ehemals avinci - the know how company + name2: logicacmg ehemals avinci - the know how company + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name: + query: logica m + highlight: + fields: + - name: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name: + query: logica ma + highlight: + fields: + - name: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name: + query: logica + highlight: + fields: + - name: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name2: + query: logica m + highlight: + fields: + - name2: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name2: + query: logica ma + highlight: + fields: + - name2: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name2: + query: logica + highlight: + fields: + - name2: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml new file mode 100644 index 0000000000000..d3dc85c87ed46 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml @@ -0,0 +1,56 @@ +--- +"Test query string with snowball": + - do: + indices.create: + index: test + body: + mappings: + properties: + field: + type: match_only_text + number: + type: integer + + - do: + index: + index: test + id: 1 + body: { field: foo bar} + + - do: + indices.refresh: + index: [test] + + - do: + indices.validate_query: + index: test + q: field:bars + analyzer: snowball + + - is_true: valid + + - do: + search: + rest_total_hits_as_int: true + index: test + q: field:bars + analyzer: snowball + + - match: {hits.total: 1} + + - do: + explain: + index: test + id: 1 + q: field:bars + analyzer: snowball + + - is_true: matched + + - do: + count: + index: test + q: field:bars + analyzer: snowball + + - match: {count : 1} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml new file mode 100644 index 0000000000000..eb884644eac38 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml @@ -0,0 +1,39 @@ +--- +"Test default search analyzer is applied": + - do: + indices.create: + index: test + body: + settings: + index.analysis.analyzer.default.type: simple + index.analysis.analyzer.default_search.type: german + mappings: + properties: + body: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: + body: Ich lese die Bücher + + - do: + indices.refresh: + index: [ test ] + + - do: + search: + index: test + q: "body:Bücher" + + - match: { hits.total.value: 0 } + + - do: + search: + index: test + q: "body:Bücher" + analyzer: simple + + - match: { hits.total.value: 1 } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml new file mode 100644 index 0000000000000..b79511d5c4dfd --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml @@ -0,0 +1,343 @@ +--- +"Test common terms query with stacked tokens": + - skip: + features: "allowed_warnings" + + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + syns: + type: synonym + synonyms: [ "quick,fast" ] + analyzer: + syns: + tokenizer: standard + filter: [ "syns" ] + mappings: + properties: + field1: + type: match_only_text + analyzer: syns + field2: + type: match_only_text + analyzer: syns + + - do: + index: + index: test + id: 3 + body: + field1: quick lazy huge brown pidgin + field2: the quick lazy huge brown fox jumps over the tree + + - do: + index: + index: test + id: 1 + body: + field1: the quick brown fox + + - do: + index: + index: test + id: 2 + body: + field1: the quick lazy huge brown fox jumps over the tree + refresh: true + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + low_freq_operator: or + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + low_freq_operator: and + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast huge fox + minimum_should_match: + low_freq: 3 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + minimum_should_match: + high_freq: 5 + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "1" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + minimum_should_match: + high_freq: 6 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the quick brown + cutoff_frequency: 3 + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + operator: and + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + operator: or + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + minimum_should_match: 3 + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [multi_match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + multi_match: + query: the fast brown + fields: [ "field1", "field2" ] + cutoff_frequency: 3 + operator: and + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.2._id: "2" } + +--- +"Test match query with synonyms - see #3881 for extensive description of the issue": + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + synonym: + type: synonym + synonyms: [ "quick,fast" ] + analyzer: + index: + type: custom + tokenizer: standard + filter: lowercase + search: + rest_total_hits_as_int: true + type: custom + tokenizer: standard + filter: [ lowercase, synonym ] + mappings: + properties: + text: + type: match_only_text + analyzer: index + search_analyzer: search + + - do: + index: + index: test + id: 1 + body: + text: quick brown fox + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick + operator: and + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick brown + operator: and + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: fast + operator: and + - match: { hits.total: 1 } + + - do: + index: + index: test + id: 2 + body: + text: fast brown fox + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick + operator: and + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick brown + operator: and + - match: { hits.total: 2 } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml new file mode 100644 index 0000000000000..4b8bc4857250f --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml @@ -0,0 +1,206 @@ +setup: + - do: + indices.create: + index: test + body: + settings: + index: + number_of_shards: 1 # keep scoring stable + analysis: + filter: + syns: + type: synonym + synonyms: [ "wtf, what the fudge", "foo, bar baz" ] + graph_syns: + type: synonym_graph + synonyms: [ "wtf, what the fudge", "foo, bar baz" ] + analyzer: + lower_syns: + type: custom + tokenizer: standard + filter: [ lowercase, syns ] + lower_graph_syns: + type: custom + tokenizer: standard + filter: [ lowercase, graph_syns ] + mappings: + properties: + field: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: + text: say wtf happened foo + - do: + index: + index: test + id: 2 + body: + text: bar baz what the fudge man + + - do: + index: + index: test + id: 3 + body: + text: wtf + + - do: + index: + index: test + id: 4 + body: + text: what is the name for fudge + + - do: + index: + index: test + id: 5 + body: + text: bar two three + + - do: + index: + index: test + id: 6 + body: + text: bar baz two three + refresh: true + +--- +"simple multiterm phrase": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match_phrase: + text: + query: foo two three + analyzer: lower_syns + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "5" } # incorrect match because we're not using graph synonyms + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match_phrase: + text: + query: foo two three + analyzer: lower_graph_syns + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "6" } # correct match because we're using graph synonyms + +--- +"simple multiterm and": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: say what the fudge + analyzer: lower_syns + operator: and + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } # non-graph synonyms coincidentally give us the correct answer here + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: say what the fudge + analyzer: lower_graph_syns + operator: and + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + +--- +"minimum should match": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: three what the fudge foo + operator: or + analyzer: lower_graph_syns + auto_generate_synonyms_phrase_query: false + - match: { hits.total: 6 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: three what the fudge foo + operator: or + analyzer: lower_graph_syns + minimum_should_match: 80% + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "6" } + - match: { hits.hits.2._id: "1" } + +--- +"multiterm synonyms phrase": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: wtf + operator: and + analyzer: lower_graph_syns + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.2._id: "1" } + +--- +"phrase prefix": + - do: + index: + index: test + id: 7 + body: + text: "WTFD!" + + - do: + index: + index: test + id: 8 + body: + text: "Weird Al's WHAT THE FUDGESICLE" + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match_phrase_prefix: + text: + query: wtf + analyzer: lower_graph_syns + - match: { hits.total: 5 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.1._id: "7" } + - match: { hits.hits.2._id: "1" } + - match: { hits.hits.3._id: "8" } + - match: { hits.hits.4._id: "2" } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml new file mode 100644 index 0000000000000..6691621fbdf22 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml @@ -0,0 +1,64 @@ +# integration tests for intervals queries using analyzers +setup: + - do: + indices.create: + index: test + body: + mappings: + properties: + text: + type: match_only_text + analyzer: standard + text_en: + type: match_only_text + analyzer: english + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "test", "_id": "4"}}' + - '{"text" : "Outside it is cold and wet and raining cats and dogs", + "text_en" : "Outside it is cold and wet and raining cats and dogs"}' + +--- +"Test use_field": + - skip: + version: " - 7.1.99" + reason: "Implemented in 7.2" + - do: + catch: bad_request + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: cats + - match: + query: dog + max_gaps: 1 + - match: { status: 400 } + - match: { error.type: "search_phase_execution_exception"} + - match: { error.reason: "all shards failed"} + - do: + catch: bad_request + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: cats + - match: + query: dog + use_field: text_en + max_gaps: 1 + - match: { status: 400 } + - match: { error.type: "search_phase_execution_exception"} + - match: { error.reason: "all shards failed"} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml new file mode 100644 index 0000000000000..38afe7db89efd --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml @@ -0,0 +1,226 @@ +# Integration tests for the phrase suggester with a few analyzers + +setup: + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 1 + analysis: + analyzer: + body: + tokenizer: standard + filter: [lowercase] + bigram: + tokenizer: standard + filter: [lowercase, bigram] + ngram: + tokenizer: standard + filter: [lowercase, ngram] + reverse: + tokenizer: standard + filter: [lowercase, reverse] + filter: + bigram: + type: shingle + output_unigrams: false + min_shingle_size: 2 + max_shingle_size: 2 + ngram: + type: shingle + output_unigrams: true + min_shingle_size: 2 + max_shingle_size: 2 + mappings: + properties: + body: + type: match_only_text + analyzer: body + fields: + bigram: + type: match_only_text + analyzer: bigram + ngram: + type: match_only_text + analyzer: ngram + reverse: + type: match_only_text + analyzer: reverse + + - do: + bulk: + index: test + refresh: true + body: | + { "index": {} } + { "body": "Xorr the God-Jewel" } + { "index": {} } + { "body": "Xorn" } + { "index": {} } + { "body": "Arthur, King of the Britons" } + { "index": {} } + { "body": "Sir Lancelot the Brave" } + { "index": {} } + { "body": "Patsy, Arthur's Servant" } + { "index": {} } + { "body": "Sir Robin the Not-Quite-So-Brave-as-Sir-Lancelot" } + { "index": {} } + { "body": "Sir Bedevere the Wise" } + { "index": {} } + { "body": "Sir Galahad the Pure" } + { "index": {} } + { "body": "Miss Islington, the Witch" } + { "index": {} } + { "body": "Zoot" } + { "index": {} } + { "body": "Leader of Robin's Minstrels" } + { "index": {} } + { "body": "Old Crone" } + { "index": {} } + { "body": "Frank, the Historian" } + { "index": {} } + { "body": "Frank's Wife" } + { "index": {} } + { "body": "Dr. Piglet" } + { "index": {} } + { "body": "Dr. Winston" } + { "index": {} } + { "body": "Sir Robin (Stand-in)" } + { "index": {} } + { "body": "Knight Who Says Ni" } + { "index": {} } + { "body": "Police sergeant who stops the film" } + +--- +"sorts by score": + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.ngram + force_unigrams: true + max_errors: 0.5 + direct_generator: + - field: body.ngram + min_word_length: 1 + suggest_mode: always + + - match: {suggest.test.0.options.0.text: xorr the god jewel} + - match: {suggest.test.0.options.1.text: xorn the god jewel} + +--- +"breaks ties by sorting terms": + # This runs the suggester without bigrams so we can be sure of the sort order + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body + analyzer: body + force_unigrams: true + max_errors: 0.5 + direct_generator: + - field: body + min_word_length: 1 + suggest_mode: always + + # The scores are identical but xorn comes first because it sorts first + - match: {suggest.test.0.options.0.text: xorn the god jewel} + - match: {suggest.test.0.options.1.text: xorr the god jewel} + - match: {suggest.test.0.options.0.score: $body.suggest.test.0.options.0.score} + +--- +"fails when asked to run on a field without unigrams": + - do: + catch: /since it doesn't emit unigrams/ + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + + - do: + catch: /since it doesn't emit unigrams/ + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + analyzer: bigram + +--- +"doesn't fail when asked to run on a field without unigrams when force_unigrams=false": + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + force_unigrams: false + + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + analyzer: bigram + force_unigrams: false + +--- +"reverse suggestions": + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: Artur, Ging of the Britons + test: + phrase: + field: body.ngram + force_unigrams: true + max_errors: 0.5 + direct_generator: + - field: body.reverse + min_word_length: 1 + suggest_mode: always + pre_filter: reverse + post_filter: reverse + + - match: {suggest.test.0.options.0.text: arthur king of the britons} diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml new file mode 100644 index 0000000000000..19c50b9157f32 --- /dev/null +++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml @@ -0,0 +1,201 @@ +setup: + - skip: + version: " - 7.1.99" + reason: "added in 7.2.0" + + - do: + indices.create: + index: test + body: + settings: + number_of_replicas: 0 + mappings: + properties: + a_field: + type: search_as_you_type + analyzer: simple + max_shingle_size: 4 + text_field: + type: match_only_text + analyzer: simple + + - do: + index: + index: test + id: 1 + body: + a_field: "quick brown fox jump lazy dog" + text_field: "quick brown fox jump lazy dog" + + - do: + indices.refresh: {} + +--- +"phrase query": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown" + highlight: + fields: + a_field: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" } + +--- +"bool prefix query": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown fo" + highlight: + fields: + a_field: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query 1 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fo" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: null } + - match: { hits.hits.0.highlight.a_field\._3gram: null } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 2 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: null } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 3 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 4 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump lazy d" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._4gram: ["quick brown fox jump lazy dog"] } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml new file mode 100644 index 0000000000000..18129e1d11861 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml @@ -0,0 +1,50 @@ +--- +"validate_query with query_string parameters": + - do: + indices.create: + index: test + body: + mappings: + properties: + field: + type: match_only_text + number: + type: integer + + - do: + indices.validate_query: + index: test + q: bar + df: field + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: field:foo field:xyz + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: field:foo field:xyz + default_operator: AND + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: field:BA* + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: number:foo + lenient: true + + - is_true: valid diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml new file mode 100644 index 0000000000000..c75dee019a351 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml @@ -0,0 +1,154 @@ +--- +"Default index": + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: "1" + mappings: + properties: + text: + type: match_only_text + fielddata: true + class: + type: keyword + + - do: + index: + index: goodbad + id: 1 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 2 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 3 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 4 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 5 + body: { text: "good bad", class: "good" } + - do: + index: + index: goodbad + id: 6 + body: { text: "good bad", class: "bad" } + - do: + index: + index: goodbad + id: 7 + body: { text: "bad", class: "bad" } + + + + - do: + indices.refresh: + index: [goodbad] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + + - match: {hits.total: 7} + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_terms": {"significant_terms": {"field": "text"}}}}}} + + - match: {aggregations.class.buckets.0.sig_terms.buckets.0.key: "bad"} + - match: {aggregations.class.buckets.1.sig_terms.buckets.0.key: "good"} + +--- +"IP test": + - do: + indices.create: + index: ip_index + body: + mappings: + properties: + ip: + type: ip + + - do: + index: + index: ip_index + id: 1 + body: { ip: "::1" } + - do: + index: + index: ip_index + id: 2 + body: { } + + - do: + indices.refresh: {} + + - do: + search: + rest_total_hits_as_int: true + body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1 } } } } + + - match: { hits.total: 1 } + + - length: { aggregations.ip_terms.buckets: 1 } + + - match: { aggregations.ip_terms.buckets.0.key: "::1" } + + - is_false: aggregations.ip_terms.buckets.0.key_as_string + + - match: { aggregations.ip_terms.buckets.0.doc_count: 1 } + + - do: + search: + rest_total_hits_as_int: true + body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "include" : [ "::1" ] } } } } + + - match: { hits.total: 1 } + + - length: { aggregations.ip_terms.buckets: 1 } + + - match: { aggregations.ip_terms.buckets.0.key: "::1" } + + - do: + search: + rest_total_hits_as_int: true + body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "exclude" : [ "::1" ] } } } } + + - match: { hits.total: 1 } + + - length: { aggregations.ip_terms.buckets: 0 } + + - do: + catch: /Aggregation \[ip_terms\] cannot support regular expression style include\/exclude settings as they can only be applied to string fields\. Use an array of values for include\/exclude clauses/ + search: + rest_total_hits_as_int: true + body: { "size" : 0, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "exclude" : "127.*" } } } } + +--- +'Misspelled fields get "did you mean"': + - skip: + version: " - 7.6.99" + reason: Implemented in 8.0 (to be backported to 7.7) + - do: + catch: /\[significant_terms\] unknown field \[jlp\] did you mean \[jlh\]\?/ + search: + body: + aggs: + foo: + significant_terms: + field: foo + jlp: {} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml new file mode 100644 index 0000000000000..873d59907982a --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml @@ -0,0 +1,151 @@ +--- +"Default index": + + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: "1" + mappings: + properties: + text: + type: match_only_text + fielddata: false + class: + type: keyword + + - do: + index: + index: goodbad + id: 1 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 2 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 3 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 4 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 5 + body: { text: "good bad", class: "good" } + - do: + index: + index: goodbad + id: 6 + body: { text: "good bad", class: "bad" } + - do: + index: + index: goodbad + id: 7 + body: { text: "bad", class: "bad" } + + + + - do: + indices.refresh: + index: [goodbad] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + + - match: {hits.total: 7} + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_text": {"significant_text": {"field": "text"}}}}}} + + - match: {aggregations.class.buckets.0.sig_text.buckets.0.key: "bad"} + - match: {aggregations.class.buckets.1.sig_text.buckets.0.key: "good"} + +--- +"Dedup noise": + + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: "1" + mappings: + properties: + text: + type: match_only_text + fielddata: false + class: + type: keyword + + - do: + index: + index: goodbad + id: 1 + body: { text: "good noisewords1 g1 g2 g3 g4 g5 g6", class: "good" } + - do: + index: + index: goodbad + id: 2 + body: { text: "good noisewords2 g1 g2 g3 g4 g5 g6", class: "good" } + - do: + index: + index: goodbad + id: 3 + body: { text: "bad noisewords3 b1 b2 b3 b4 b5 b6", class: "bad" } + - do: + index: + index: goodbad + id: 4 + body: { text: "bad noisewords4 b1 b2 b3 b4 b5 b6", class: "bad" } + - do: + index: + index: goodbad + id: 5 + body: { text: "good bad noisewords5 gb1 gb2 gb3 gb4 gb5 gb6", class: "good" } + - do: + index: + index: goodbad + id: 6 + body: { text: "good bad noisewords6 gb1 gb2 gb3 gb4 gb5 gb6", class: "bad" } + - do: + index: + index: goodbad + id: 7 + body: { text: "bad noisewords7 b1 b2 b3 b4 b5 b6", class: "bad" } + + + + - do: + indices.refresh: + index: [goodbad] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + + - match: {hits.total: 7} + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_text": {"significant_text": {"field": "text", "filter_duplicate_text": true}}}}}} + + - match: {aggregations.class.buckets.0.sig_text.buckets.0.key: "bad"} + - length: { aggregations.class.buckets.0.sig_text.buckets: 1 } + - match: {aggregations.class.buckets.1.sig_text.buckets.0.key: "good"} + - length: { aggregations.class.buckets.1.sig_text.buckets: 1 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml new file mode 100644 index 0000000000000..a1b9e7a5506e2 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml @@ -0,0 +1,137 @@ +setup: + - do: + indices.create: + index: test + body: + mappings: + _source: + excludes: ["nested.stored_only"] + properties: + nested: + type: nested + properties: + field: + type: text + fields: + vectors: + type: text + term_vector: "with_positions_offsets" + postings: + type: text + index_options: "offsets" + stored: + type: match_only_text + store: true + stored_only: + type: match_only_text + store: true + - do: + index: + index: test + id: 1 + refresh: true + body: + nested: + field : "The quick brown fox is brown." + stored : "The quick brown fox is brown." + stored_only : "The quick brown fox is brown." + +--- +"Unified highlighter": + - do: + search: + index: test + body: + query: + nested: + path: "nested" + query: + multi_match: + query: "quick brown fox" + fields: [ "nested.field", "nested.field.vectors", "nested.field.postings" ] + inner_hits: + highlight: + type: "unified" + fields: + nested.field: {} + nested.field.vectors: {} + nested.field.postings: {} + + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field.0: "The quick brown fox is brown." } + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field\.vectors.0: "The quick brown fox is brown." } + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field\.postings.0: "The quick brown fox is brown." } + +--- +"Unified highlighter with stored fields": + - do: + search: + index: test + body: + query: + nested: + path: "nested" + query: + multi_match: + query: "quick brown fox" + fields: [ "nested.stored", "nested.stored_only" ] + inner_hits: + highlight: + type: "unified" + fields: + nested.stored: {} + nested.stored_only: {} + + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored.0: "The quick brown fox is brown." } + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored_only.0: "The quick brown fox is brown." } + +--- +"Unified highlighter with stored fields and disabled source": + - skip: + version: "- 7.10.1" + reason: "bug fix introduced in 7.10.2" + - do: + indices.create: + index: disabled_source + body: + mappings: + _source: + enabled: false + properties: + nested: + type: nested + properties: + field: + type: match_only_text + stored_only: + type: match_only_text + store: true + - do: + index: + index: disabled_source + id: 1 + refresh: true + body: + nested: + field: "The quick brown fox is brown." + stored_only: "The quick brown fox is brown." + + - do: + search: + index: disabled_source + body: + query: + nested: + path: "nested" + query: + multi_match: + query: "quick brown fox" + fields: ["nested.field", "nested.stored_only"] + inner_hits: + highlight: + type: "unified" + fields: + nested.field: {} + nested.stored_only: {} + + - is_false: hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored_only.0: "The quick brown fox is brown."} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml index be97930d41eb9..582880b0748c9 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml @@ -45,6 +45,8 @@ setup: type: keyword text: type: text + match_only_text: + type: match_only_text - do: headers: @@ -70,6 +72,7 @@ setup: inner1: "foo" inner2: "bar" text: "foo bar" + match_only_text: "foo bar" - do: headers: @@ -94,6 +97,7 @@ setup: object: inner1: "foo" text: "foo bar" + match_only_text: "foo bar" - do: headers: @@ -119,6 +123,7 @@ setup: object: inner2: "bar" text: "foo bar" + match_only_text: "foo bar" - do: index: @@ -184,6 +189,8 @@ setup: doc_values: false text: type: text + match_only_text: + type: match_only_text - do: headers: @@ -209,6 +216,7 @@ setup: inner1: "foo" inner2: "bar" text: "foo bar" + match_only_text: "foo bar" - do: headers: @@ -233,6 +241,7 @@ setup: object: inner1: "foo" text: "foo bar" + match_only_text: "foo bar" - do: headers: @@ -258,6 +267,7 @@ setup: object: inner2: "bar" text: "foo bar" + match_only_text: "foo bar" - do: index: @@ -322,6 +332,8 @@ setup: type: keyword text: type: text + match_only_text: + type: match_only_text - do: indices.refresh: @@ -534,7 +546,18 @@ setup: field: text - match: {hits.total: 3} +--- +"Test exists query on mapped match_only_text field": + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + exists: + field: match_only_text + - match: {hits.total: 3} --- "Test exists query on _id field": - do: @@ -821,6 +844,18 @@ setup: - match: {hits.total: 0} --- +"Test exists query on unmapped match_only_text field": + - do: + search: + rest_total_hits_as_int: true + index: test-unmapped + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 0} +--- "Test exists query on binary field in empty index": - do: search: @@ -1028,6 +1063,19 @@ setup: - match: {hits.total: 0} +--- +"Test exists query on match_only_text field in empty index": + - do: + search: + rest_total_hits_as_int: true + index: test-empty + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 0} + --- "Test exists query on mapped binary field with no doc values": - do: @@ -1236,3 +1284,16 @@ setup: field: text - match: {hits.total: 3} + +--- +"Test exists query on mapped match_only_text field with no doc values": + - do: + search: + rest_total_hits_as_int: true + index: test-no-dv + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 3} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search_field_match_only_text.yml new file mode 100644 index 0000000000000..7c0499de538cf --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search_field_match_only_text.yml @@ -0,0 +1,64 @@ +--- +"search with indexed phrases": + - do: + indices.create: + index: test + body: + mappings: + properties: + text: + type: match_only_text + index_phrases: true + + - do: + index: + index: test + id: 1 + body: { text: "peter piper picked a peck of pickled peppers" } + + - do: + indices.refresh: + index: [test] + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + text: + query: "peter piper" + + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + index: test + q: '"peter piper"~1' + df: text + + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + text: "peter piper picked" + + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + text: "piper" + + - match: {hits.total: 1} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml new file mode 100644 index 0000000000000..3b380bb808245 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml @@ -0,0 +1,345 @@ +setup: + - skip: + version: " - 7.1.99" + reason: "added in 7.2.0" + + - do: + indices.create: + index: test + body: + mappings: + properties: + my_field1: + type: match_only_text + my_field2: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: + my_field1: "brown fox jump" + my_field2: "xylophone" + + - do: + index: + index: test + id: 2 + body: + my_field1: "brown emu jump" + my_field2: "xylophone" + + - do: + index: + index: test + id: 3 + body: + my_field1: "jumparound" + my_field2: "emu" + + - do: + index: + index: test + id: 4 + body: + my_field1: "dog" + my_field2: "brown fox jump lazy" + + - do: + indices.refresh: {} + +--- +"scoring complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: "brown fox jump" + + - match: { hits.total: 3 } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"scoring partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: "brown fox ju" + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"minimum should match": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "brown fox jump" + minimum_should_match: 3 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + +--- +"analyzer": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "BROWN dog" + analyzer: whitespace # this analyzer doesn't lowercase terms + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + +--- +"operator": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "brown fox jump" + operator: AND + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + +--- +"fuzziness": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field2: + query: "xylophoen foo" + fuzziness: 1 + prefix_length: 1 + max_expansions: 10 + fuzzy_transpositions: true + fuzzy_rewrite: constant_score + + - match: { hits.total: 2 } + - match: { hits.hits.0._source.my_field2: "xylophone" } + - match: { hits.hits.1._source.my_field2: "xylophone" } + +--- +"multi_match single field complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump" + type: bool_prefix + fields: [ "my_field1" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"multi_match single field partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: bool_prefix + fields: [ "my_field1" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"multi_match multiple fields complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump lazy" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1._source.my_field1: "brown fox jump" } + - match: { hits.hits.2._id: "2" } + - match: { hits.hits.2._source.my_field1: "brown emu jump" } + +--- +"multi_match multiple fields partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump laz" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1._source.my_field1: "brown fox jump" } + - match: { hits.hits.2._id: "2" } + - match: { hits.hits.2._source.my_field1: "brown emu jump" } + +--- +"multi_match multiple fields with analyzer": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "BROWN FOX JUMP dog" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + analyzer: whitespace # this analyzer doesn't lowercase terms + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with minimum_should_match": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + minimum_should_match: 4 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with fuzziness": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "dob nomatch" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + fuzziness: 1 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with slop throws exception": + + - do: + catch: /\[slop\] not allowed for type \[bool_prefix\]/ + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + slop: 1 + +--- +"multi_match multiple fields with cutoff_frequency throws exception": + + - do: + catch: /\[cutoff_frequency\] not allowed for type \[bool_prefix\]/ + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + cutoff_frequency: 0.001 diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml new file mode 100644 index 0000000000000..40989de61810b --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml @@ -0,0 +1,147 @@ +--- +setup: + - skip: + version: " - 7.6.99" + reason: "implemented in 7.7.0" + + - do: + indices.create: + index: test + body: + mappings: + properties: + text: + type: match_only_text + analyzer: standard + fields: + raw: + type: keyword + nested1: + type: nested + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "test", "_id": "1"}}' + - '{"text" : "Some like it hot, some like it cold", "nested1": [{"foo": "bar1"}]}' + - '{"index": {"_index": "test", "_id": "2"}}' + - '{"text" : "Its cold outside, theres no kind of atmosphere", "nested1": [{"foo": "bar2"}]}' + - '{"index": {"_index": "test", "_id": "3"}}' + - '{"text" : "Baby its cold there outside", "nested1": [{"foo": "bar3"}]}' + - '{"index": {"_index": "test", "_id": "4"}}' + - '{"text" : "Outside it is cold and wet", "nested1": [{"foo": "bar4"}]}' + +--- +teardown: + - skip: + version: " - 7.6.99" + reason: "implemented in 7.7.0" + + - do: + cluster.put_settings: + body: + transient: + search.allow_expensive_queries: null + +--- +"Test disallow expensive queries": + - skip: + version: " - 7.6.99" + reason: "implemented in 7.7.0" + + ### Check for initial setting = null -> false + - do: + cluster.get_settings: + flat_settings: true + + - is_false: search.allow_expensive_queries + + ### Update setting to false + - do: + cluster.put_settings: + body: + transient: + search.allow_expensive_queries: "false" + flat_settings: true + + - match: {transient: {search.allow_expensive_queries: "false"}} + + ### Prefix + - do: + catch: /\[prefix\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false. For optimised prefix queries on text fields please enable \[index_prefixes\]./ + search: + index: test + body: + query: + prefix: + text: + value: out + + ### Fuzzy + - do: + catch: /\[fuzzy\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + fuzzy: + text: + value: outwide + + ### Regexp + - do: + catch: /\[regexp\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + regexp: + text: + value: .*ou.*id.* + + ### Wildcard + - do: + catch: /\[wildcard\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + wildcard: + text: + value: out?ide + + ### Range on text + - do: + catch: /\[range\] queries on \[text\] or \[keyword\] fields cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + range: + text: + gte: "theres" + + ### Range on keyword + - do: + catch: /\[range\] queries on \[text\] or \[keyword\] fields cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + range: + text.raw: + gte : "Outside it is cold and wet" + + ### Nested + - do: + catch: /\[joining\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + nested: + path: "nested1" + query: + bool: + must: [{"match" : {"nested1.foo" : "bar2"}}] diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml new file mode 100644 index 0000000000000..142293bc33c5c --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml @@ -0,0 +1,101 @@ +--- +"Basic /_search_shards test": + - do: + indices.create: + index: test_1 + + - do: + search_shards: + index: test_1 + routing: foo + + - match: { shards.0.0.index: test_1 } + +--- +"Search shards aliases with and without filters": + - do: + indices.create: + index: test_index + body: + settings: + index: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + field: + type: match_only_text + aliases: + test_alias_no_filter: {} + test_alias_filter_1: + filter: + term: + field : value1 + test_alias_filter_2: + filter: + term: + field : value2 + + - do: + search_shards: + index: test_alias_no_filter + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - is_true: indices.test_index + - is_false: indices.test_index.filter + - match: { indices.test_index.aliases: [test_alias_no_filter]} + + - do: + search_shards: + index: test_alias_filter_1 + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1] } + - match: { indices.test_index.filter.term.field.value: value1 } + - lte: { indices.test_index.filter.term.field.boost: 1.0 } + - gte: { indices.test_index.filter.term.field.boost: 1.0 } + + - do: + search_shards: + index: ["test_alias_filter_1","test_alias_filter_2"] + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_filter_2]} + - length: { indices.test_index.filter.bool.should: 2 } + - lte: { indices.test_index.filter.bool.should.0.term.field.boost: 1.0 } + - gte: { indices.test_index.filter.bool.should.0.term.field.boost: 1.0 } + - lte: { indices.test_index.filter.bool.should.1.term.field.boost: 1.0 } + - gte: { indices.test_index.filter.bool.should.1.term.field.boost: 1.0 } + - match: { indices.test_index.filter.bool.adjust_pure_negative: true} + - lte: { indices.test_index.filter.bool.boost: 1.0 } + - gte: { indices.test_index.filter.bool.boost: 1.0 } + + - do: + search_shards: + index: "test*" + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_filter_2, test_alias_no_filter]} + - is_false: indices.test_index.filter + + - do: + search_shards: + index: ["test_alias_filter_1","test_alias_no_filter"] + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_no_filter]} + - is_false: indices.test_index.filter + + - do: + search_shards: + index: ["test_alias_no_filter"] + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_no_filter]} + - is_false: indices.test_index.filter From f338746dffe8aa60b75a1a97b62cdaf782d79b66 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Mon, 20 Nov 2023 15:53:22 -0800 Subject: [PATCH 14/24] Added skip section in integ test to fix mixed cluster failures Signed-off-by: Rishabh Maurya --- .../20_highlighting_field_match_only_text.yml | 4 +- .../20_query_string_field_match_only_text.yml | 3 + .../30_sig_terms_field_match_only_text.yml | 84 +------------------ .../90_sig_text_field_match_only.yml | 8 +- .../20_highlighting_field_match_only_text.yml | 3 + ...0_phrase_search_field_match_only_text.yml} | 4 +- ...atch_bool_prefix_field_match_only_text.yml | 4 +- .../mapper/MatchOnlyTextFieldMapper.java | 27 ------ .../index/query/SourceFieldMatchQuery.java | 2 +- .../mapper/MatchOnlyTextFieldMapperTests.java | 3 + .../query/SourceFieldMatchQueryTests.java | 3 - .../index/mapper/MapperServiceTestCase.java | 1 + 12 files changed, 27 insertions(+), 119 deletions(-) rename rest-api-spec/src/main/resources/rest-api-spec/test/search/{200_index_phrase_search_field_match_only_text.yml => 200_phrase_search_field_match_only_text.yml} (92%) diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml index 19c50b9157f32..306aeb8f5c2f2 100644 --- a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml +++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml @@ -1,7 +1,7 @@ setup: - skip: - version: " - 7.1.99" - reason: "added in 7.2.0" + version: " - 2.11.99" + reason: "match_only_text field was introduced in 2.12.0" - do: indices.create: diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml index 18129e1d11861..5387dc8c0bca6 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml @@ -1,5 +1,8 @@ --- "validate_query with query_string parameters": + - skip: + version: " - 2.11.99" + reason: "match_only_text field was introduced in 2.12.0" - do: indices.create: index: test diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml index c75dee019a351..60c59a93ca62a 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml @@ -1,5 +1,8 @@ --- "Default index": + - skip: + version: " - 2.11.99" + reason: "match_only_text field was introduced in 2.12.0" - do: indices.create: index: goodbad @@ -71,84 +74,3 @@ - match: {aggregations.class.buckets.0.sig_terms.buckets.0.key: "bad"} - match: {aggregations.class.buckets.1.sig_terms.buckets.0.key: "good"} - ---- -"IP test": - - do: - indices.create: - index: ip_index - body: - mappings: - properties: - ip: - type: ip - - - do: - index: - index: ip_index - id: 1 - body: { ip: "::1" } - - do: - index: - index: ip_index - id: 2 - body: { } - - - do: - indices.refresh: {} - - - do: - search: - rest_total_hits_as_int: true - body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1 } } } } - - - match: { hits.total: 1 } - - - length: { aggregations.ip_terms.buckets: 1 } - - - match: { aggregations.ip_terms.buckets.0.key: "::1" } - - - is_false: aggregations.ip_terms.buckets.0.key_as_string - - - match: { aggregations.ip_terms.buckets.0.doc_count: 1 } - - - do: - search: - rest_total_hits_as_int: true - body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "include" : [ "::1" ] } } } } - - - match: { hits.total: 1 } - - - length: { aggregations.ip_terms.buckets: 1 } - - - match: { aggregations.ip_terms.buckets.0.key: "::1" } - - - do: - search: - rest_total_hits_as_int: true - body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "exclude" : [ "::1" ] } } } } - - - match: { hits.total: 1 } - - - length: { aggregations.ip_terms.buckets: 0 } - - - do: - catch: /Aggregation \[ip_terms\] cannot support regular expression style include\/exclude settings as they can only be applied to string fields\. Use an array of values for include\/exclude clauses/ - search: - rest_total_hits_as_int: true - body: { "size" : 0, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "exclude" : "127.*" } } } } - ---- -'Misspelled fields get "did you mean"': - - skip: - version: " - 7.6.99" - reason: Implemented in 8.0 (to be backported to 7.7) - - do: - catch: /\[significant_terms\] unknown field \[jlp\] did you mean \[jlh\]\?/ - search: - body: - aggs: - foo: - significant_terms: - field: foo - jlp: {} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml index 873d59907982a..01548df586de6 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml @@ -1,6 +1,8 @@ --- "Default index": - + - skip: + version: " - 2.11.99" + reason: "match_only_text field was introduced in 2.12.0" - do: indices.create: index: goodbad @@ -75,7 +77,9 @@ --- "Dedup noise": - + - skip: + version: " - 2.11.99" + reason: "match_only_text field was introduced in 2.12.0" - do: indices.create: index: goodbad diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml index a1b9e7a5506e2..d3fdd87ee3a63 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml @@ -1,4 +1,7 @@ setup: + - skip: + version: " - 2.11.99" + reason: "match_only_text field was introduced in 2.12.0" - do: indices.create: index: test diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml similarity index 92% rename from rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search_field_match_only_text.yml rename to rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml index 7c0499de538cf..fa51cd83e62ad 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml @@ -1,5 +1,8 @@ --- "search with indexed phrases": + - skip: + version: " - 2.11.99" + reason: "match_only_text field was introduced in 2.12.0" - do: indices.create: index: test @@ -8,7 +11,6 @@ properties: text: type: match_only_text - index_phrases: true - do: index: diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml index 3b380bb808245..522a5fe077846 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml @@ -1,7 +1,7 @@ setup: - skip: - version: " - 7.1.99" - reason: "added in 7.2.0" + version: " - 2.11.99" + reason: "match_only_text field was introduced in 2.12.0" - do: indices.create: diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java index 6b5055d07fd39..ef35a0592a897 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -22,7 +22,6 @@ import org.apache.lucene.search.TermQuery; import org.opensearch.Version; import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery; -import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.analysis.NamedAnalyzer; import org.opensearch.index.query.QueryShardContext; @@ -308,30 +307,4 @@ private List> getTermsFromTokenStream(TokenStream stream) throws IOEx return termArray; } } - - @Override - protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { - // this is a pain, but we have to do this to maintain BWC - builder.field("type", contentType()); - Builder mapperBuilder = (MatchOnlyTextFieldMapper.Builder) getMergeBuilder(); - mapperBuilder.boost.toXContent(builder, includeDefaults); - mapperBuilder.index.toXContent(builder, includeDefaults); - mapperBuilder.store.toXContent(builder, includeDefaults); - this.multiFields.toXContent(builder, params); - this.copyTo.toXContent(builder, params); - mapperBuilder.meta.toXContent(builder, includeDefaults); - mapperBuilder.indexOptions.toXContent(builder, includeDefaults); - mapperBuilder.termVectors.toXContent(builder, includeDefaults); - mapperBuilder.norms.toXContent(builder, includeDefaults); - mapperBuilder.analyzers.indexAnalyzer.toXContent(builder, includeDefaults); - mapperBuilder.analyzers.searchAnalyzer.toXContent(builder, includeDefaults); - mapperBuilder.analyzers.searchQuoteAnalyzer.toXContent(builder, includeDefaults); - mapperBuilder.similarity.toXContent(builder, includeDefaults); - mapperBuilder.eagerGlobalOrdinals.toXContent(builder, includeDefaults); - mapperBuilder.positionIncrementGap.toXContent(builder, includeDefaults); - mapperBuilder.fieldData.toXContent(builder, includeDefaults); - mapperBuilder.freqFilter.toXContent(builder, includeDefaults); - mapperBuilder.indexPrefixes.toXContent(builder, includeDefaults); - mapperBuilder.indexPhrases.toXContent(builder, includeDefaults); - } } diff --git a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java index 6c1b87a810d0f..4e9a74a92498a 100644 --- a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java +++ b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java @@ -45,7 +45,7 @@ public class SourceFieldMatchQuery extends Query { * Constructs a SourceFieldMatchQuery. * * @param delegateQuery The parent query to use to find matches. - * @param filter The query used to filter further by running against field value computed using _source field. + * @param filter The query used to filter further by running against field value fetched using _source field. * @param fieldType The mapped field type. * @param context The QueryShardContext to get lookup and valueFetcher */ diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java index bf39677c0ea14..36aee352732d2 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -166,6 +166,9 @@ public void testPositionIncrementGap() throws IOException {} @Override public void testDefaultPositionIncrementGap() throws IOException {} + @Override + public void testMinimalToMaximal() throws IOException {} + @Override public void testIndexPrefixMapping() throws IOException { MapperParsingException e = expectThrows( diff --git a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java index f3d8ec05ed414..dc98b2f4faf1c 100644 --- a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java +++ b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java @@ -38,7 +38,6 @@ public void testAllPossibleScenarios() throws IOException { QueryShardContext queryShardContext = createQueryShardContext(mapperService); when(queryShardContext.sourcePath("desert")).thenReturn(Set.of("desert")); when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); - when(queryShardContext.documentMapper(anyString())).thenReturn(mapperService.documentMapper()); String[] deserts = new String[] { "apple pie pie", "banana split pie", "chocolate cake" }; List docs = new ArrayList<>(); @@ -113,7 +112,6 @@ public void testSourceDisabled() throws IOException { QueryShardContext queryShardContext = createQueryShardContext(mapperService); when(queryShardContext.sourcePath("desert")).thenReturn(Set.of("desert")); when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); - when(queryShardContext.documentMapper(anyString())).thenReturn(mapperService.documentMapper()); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> new SourceFieldMatchQuery( @@ -141,7 +139,6 @@ public void testMissingField() throws IOException { QueryShardContext queryShardContext = createQueryShardContext(mapperService); when(queryShardContext.sourcePath("desert")).thenReturn(Set.of("desert")); when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); - when(queryShardContext.documentMapper(anyString())).thenReturn(mapperService.documentMapper()); String[] deserts = new String[] { "apple pie pie", "banana split pie", "chocolate cake" }; List docs = new ArrayList<>(); diff --git a/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java index c2aa3c2436363..ac78a0d1936ea 100644 --- a/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java @@ -255,6 +255,7 @@ protected QueryShardContext createQueryShardContext(MapperService mapperService) throw new UnsupportedOperationException("search lookup not available"); })); when(queryShardContext.getFieldType(any())).thenAnswer(inv -> mapperService.fieldType(inv.getArguments()[0].toString())); + when(queryShardContext.documentMapper(anyString())).thenReturn(mapperService.documentMapper()); return queryShardContext; } } From d203f4bea6164c434237574cebbba01a8cd2299b Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Mon, 20 Nov 2023 15:54:20 -0800 Subject: [PATCH 15/24] remove unused import Signed-off-by: Rishabh Maurya --- .../org/opensearch/index/query/SourceFieldMatchQueryTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java index dc98b2f4faf1c..294c246bd1f99 100644 --- a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java +++ b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java @@ -21,7 +21,6 @@ import java.util.List; import java.util.Set; -import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.when; public class SourceFieldMatchQueryTests extends MapperServiceTestCase { From 1a4fbd0c1987a181d415b227c36b83a11f904fb4 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Tue, 5 Dec 2023 15:45:49 +0530 Subject: [PATCH 16/24] Address PR comments Signed-off-by: Rishabh Maurya --- .../search/160_exists_query_match_only.yml | 121 ++++++++++++++++++ .../index/mapper/MappedFieldType.java | 22 ++-- .../mapper/MatchOnlyTextFieldMapper.java | 6 +- .../index/query/SourceFieldMatchQuery.java | 5 +- .../MatchOnlyTextFieldAnalyzerModeTests.java | 40 ++++++ .../mapper/MatchOnlyTextFieldTypeTests.java | 54 ++++++++ .../mapper/TextFieldAnalyzerModeTests.java | 22 ++-- .../index/mapper/TextFieldTypeTests.java | 32 +++-- .../query/SourceFieldMatchQueryTests.java | 60 ++++----- 9 files changed, 286 insertions(+), 76 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only.yml create mode 100644 server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java create mode 100644 server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only.yml new file mode 100644 index 0000000000000..d5a9f24c15825 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only.yml @@ -0,0 +1,121 @@ +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - skip: + features: ["headers"] + + - do: + indices.create: + index: test + body: + mappings: + dynamic: false + properties: + match_only_text: + type: match_only_text + + - do: + headers: + Content-Type: application/json + index: + index: "test" + id: 1 + body: + match_only_text: "foo bar" + + - do: + headers: + Content-Type: application/json + index: + index: "test" + id: 2 + body: + match_only_text: "foo bar" + + - do: + headers: + Content-Type: application/json + index: + index: "test" + id: 3 + routing: "route_me" + body: + match_only_text: "foo bar" + + - do: + index: + index: "test" + id: 4 + body: {} + + - do: + indices.create: + index: test-unmapped + body: + mappings: + dynamic: false + properties: + unrelated: + type: keyword + + - do: + index: + index: "test-unmapped" + id: 1 + body: + unrelated: "foo" + + - do: + indices.create: + index: test-empty + body: + mappings: + dynamic: false + properties: + match_only_text: + type: match_only_text + + - do: + indices.refresh: + index: [test, test-unmapped, test-empty] + +--- +"Test exists query on mapped match_only_text field": + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 3} + +--- +"Test exists query on unmapped match_only_text field": + - do: + search: + rest_total_hits_as_int: true + index: test-unmapped + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 0} + +--- +"Test exists query on match_only_text field in empty index": + - do: + search: + rest_total_hits_as_int: true + index: test-empty + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 0} + diff --git a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java index 66d4654e543a2..6f9e3f894e091 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java @@ -354,36 +354,36 @@ public Query existsQuery(QueryShardContext context) { } public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + return phraseQuery(stream, slop, enablePositionIncrements, null); + } + + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { throw new IllegalArgumentException( "Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } - public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { - return phraseQuery(stream, slop, enablePositionIncrements); + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + return multiPhraseQuery(stream, slop, enablePositionIncrements, null); } - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) + throws IOException { throw new IllegalArgumentException( "Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) - throws IOException { - return multiPhraseQuery(stream, slop, enablePositionIncrements); + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + return phrasePrefixQuery(stream, slop, maxExpansions, null); } - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { throw new IllegalArgumentException( "Can only use phrase prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { - return phrasePrefixQuery(stream, slop, maxExpansions); - } - public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { throw new IllegalArgumentException( "Can only use span prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java index ef35a0592a897..e69336dc385aa 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -137,7 +137,6 @@ public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAna @Override public MatchOnlyTextFieldMapper build(BuilderContext context) { - // TODO - disable norms and index-options and validate FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors); MatchOnlyTextFieldType tft = buildFieldType(fieldType, context); return new MatchOnlyTextFieldMapper( @@ -167,9 +166,6 @@ protected MatchOnlyTextFieldType buildFieldType(FieldType fieldType, BuilderCont "Cannot set position_increment_gap on field [" + name + "] without indexing enabled" ); } - // for index analyzer we don't set positionIncrementGap whereas for search analyzer its set because - // phrase queries, which make use of it, should work fine as they will directly work on the field value - // per matched document by reading from _source field. indexAnalyzer = new NamedAnalyzer(indexAnalyzer, positionIncrementGap.get()); searchAnalyzer = new NamedAnalyzer(searchAnalyzer, positionIncrementGap.get()); searchQuoteAnalyzer = new NamedAnalyzer(searchQuoteAnalyzer, positionIncrementGap.get()); @@ -220,7 +216,7 @@ protected List> getParameters() { * * @opensearch.internal */ - public static final class MatchOnlyTextFieldType extends TextFieldMapper.TextFieldType { + public static final class MatchOnlyTextFieldType extends TextFieldType { private final boolean indexPhrases = false; private PrefixFieldType prefixFieldType; diff --git a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java index 4e9a74a92498a..fd8d7045f307e 100644 --- a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java +++ b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java @@ -83,7 +83,7 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - Weight weight = delegateQuery.createWeight(searcher, scoreMode, boost); + Weight weight = delegateQuery.createWeight(searcher, ScoreMode.TOP_DOCS, boost); return new ConstantScoreWeight(this, boost) { @@ -108,7 +108,6 @@ public boolean matches() { return false; } MemoryIndex memoryIndex = new MemoryIndex(); - for (Object value : values) { memoryIndex.addField(fieldType.name(), (String) value, fieldType.indexAnalyzer()); } @@ -122,7 +121,7 @@ public float matchCost() { return 1000f; } }; - return new ConstantScoreScorer(this, score(), scoreMode, twoPhase); + return new ConstantScoreScorer(this, score(), ScoreMode.TOP_DOCS, twoPhase); } @Override diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java new file mode 100644 index 0000000000000..f6b80bbd741f2 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.index.mapper; + +public class MatchOnlyTextFieldAnalyzerModeTests extends TextFieldAnalyzerModeTests { + @Override + ParametrizedFieldMapper.TypeParser getTypeParser() { + return MatchOnlyTextFieldMapper.PARSER; + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java new file mode 100644 index 0000000000000..acd12c04a99bc --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java @@ -0,0 +1,54 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.index.mapper; + +import org.opensearch.common.lucene.Lucene; + +public class MatchOnlyTextFieldTypeTests extends TextFieldTypeTests { + + @Override + TextFieldMapper.TextFieldType createFieldType(boolean searchable) { + TextSearchInfo tsi = new TextSearchInfo( + TextFieldMapper.Defaults.FIELD_TYPE, + null, + Lucene.STANDARD_ANALYZER, + Lucene.STANDARD_ANALYZER + ); + return new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( + "field", + searchable, + false, + tsi, + ParametrizedFieldMapper.Parameter.metaParam().get() + ); + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldAnalyzerModeTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldAnalyzerModeTests.java index 93bed729f0974..83a3bdc580ae6 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldAnalyzerModeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldAnalyzerModeTests.java @@ -59,6 +59,9 @@ import static org.mockito.Mockito.when; public class TextFieldAnalyzerModeTests extends OpenSearchTestCase { + ParametrizedFieldMapper.TypeParser getTypeParser() { + return TextFieldMapper.PARSER; + } private static Map defaultAnalyzers() { Map analyzers = new HashMap<>(); @@ -101,7 +104,7 @@ public void testParseTextFieldCheckAnalyzerAnalysisMode() { IndexAnalyzers indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); - TextFieldMapper.PARSER.parse("field", fieldNode, parserContext); + getTypeParser().parse("field", fieldNode, parserContext); // check that "analyzer" set to something that only supports AnalysisMode.SEARCH_TIME or AnalysisMode.INDEX_TIME is blocked AnalysisMode mode = randomFrom(AnalysisMode.SEARCH_TIME, AnalysisMode.INDEX_TIME); @@ -110,7 +113,7 @@ public void testParseTextFieldCheckAnalyzerAnalysisMode() { indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); fieldNode.put("analyzer", "my_analyzer"); - MapperException ex = expectThrows(MapperException.class, () -> { TextFieldMapper.PARSER.parse("name", fieldNode, parserContext); }); + MapperException ex = expectThrows(MapperException.class, () -> { getTypeParser().parse("name", fieldNode, parserContext); }); assertThat( ex.getMessage(), containsString("analyzer [my_named_analyzer] contains filters [my_analyzer] that are not allowed to run") @@ -136,7 +139,7 @@ public void testParseTextFieldCheckSearchAnalyzerAnalysisMode() { IndexAnalyzers indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); - TextFieldMapper.PARSER.parse("textField", fieldNode, parserContext); + getTypeParser().parse("textField", fieldNode, parserContext); // check that "analyzer" set to AnalysisMode.INDEX_TIME is blocked mode = AnalysisMode.INDEX_TIME; @@ -151,10 +154,7 @@ public void testParseTextFieldCheckSearchAnalyzerAnalysisMode() { if (settingToTest.equals("search_quote_analyzer")) { fieldNode.put("search_analyzer", "standard"); } - MapperException ex = expectThrows( - MapperException.class, - () -> { TextFieldMapper.PARSER.parse("field", fieldNode, parserContext); } - ); + MapperException ex = expectThrows(MapperException.class, () -> { getTypeParser().parse("field", fieldNode, parserContext); }); assertEquals( "analyzer [my_named_analyzer] contains filters [my_analyzer] that are not allowed to run in search time mode.", ex.getMessage() @@ -174,10 +174,7 @@ public void testParseTextFieldCheckAnalyzerWithSearchAnalyzerAnalysisMode() { analyzers.put("my_analyzer", new NamedAnalyzer("my_named_analyzer", AnalyzerScope.INDEX, createAnalyzerWithMode(mode))); IndexAnalyzers indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); - MapperException ex = expectThrows( - MapperException.class, - () -> { TextFieldMapper.PARSER.parse("field", fieldNode, parserContext); } - ); + MapperException ex = expectThrows(MapperException.class, () -> { getTypeParser().parse("field", fieldNode, parserContext); }); assertThat( ex.getMessage(), containsString("analyzer [my_named_analyzer] contains filters [my_analyzer] that are not allowed to run") @@ -193,7 +190,6 @@ public void testParseTextFieldCheckAnalyzerWithSearchAnalyzerAnalysisMode() { indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); - TextFieldMapper.PARSER.parse("field", fieldNode, parserContext); + getTypeParser().parse("field", fieldNode, parserContext); } - } diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java index 0592a972db5e9..9c177bbec61fd 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java @@ -66,35 +66,39 @@ public class TextFieldTypeTests extends FieldTypeTestCase { - private static TextFieldType createFieldType() { - return new TextFieldType("field"); + TextFieldType createFieldType(boolean searchabe) { + if (searchabe) { + return new TextFieldType("field"); + } else { + return new TextFieldType("field", false, false, Collections.emptyMap()); + } } public void testIsAggregatableDependsOnFieldData() { - TextFieldType ft = createFieldType(); + TextFieldType ft = createFieldType(true); assertFalse(ft.isAggregatable()); ft.setFielddata(true); assertTrue(ft.isAggregatable()); } public void testTermQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); assertEquals(new TermQuery(new Term("field", "foo")), ft.termQuery("foo", null)); assertEquals(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo")), ft.termQueryCaseInsensitive("fOo", null)); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + MappedFieldType unsearchable = createFieldType(false); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("bar", null)); assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); } public void testTermsQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); List terms = new ArrayList<>(); terms.add(new BytesRef("foo")); terms.add(new BytesRef("bar")); assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "bar"), null)); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + MappedFieldType unsearchable = createFieldType(false); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.termsQuery(Arrays.asList("foo", "bar"), null) @@ -103,7 +107,7 @@ public void testTermsQuery() { } public void testRangeQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); assertEquals( new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false), ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC) @@ -120,13 +124,13 @@ public void testRangeQuery() { } public void testRegexpQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); assertEquals( new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC) ); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + MappedFieldType unsearchable = createFieldType(false); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC) @@ -141,13 +145,13 @@ public void testRegexpQuery() { } public void testFuzzyQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); assertEquals( new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true), ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) ); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + MappedFieldType unsearchable = createFieldType(false); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) @@ -162,7 +166,7 @@ public void testFuzzyQuery() { } public void testIndexPrefixes() { - TextFieldType ft = createFieldType(); + TextFieldType ft = createFieldType(true); ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType(ft, "field._index_prefix", 2, 10)); Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, false, randomMockShardContext()); @@ -222,7 +226,7 @@ public void testIndexPrefixes() { } public void testFetchSourceValue() throws IOException { - TextFieldType fieldType = createFieldType(); + TextFieldType fieldType = createFieldType(true); fieldType.setIndexAnalyzer(Lucene.STANDARD_ANALYZER); assertEquals(List.of("value"), fetchSourceValue(fieldType, "value")); diff --git a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java index 294c246bd1f99..894e75099ae34 100644 --- a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java +++ b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java @@ -27,7 +27,7 @@ public class SourceFieldMatchQueryTests extends MapperServiceTestCase { public void testAllPossibleScenarios() throws IOException { MapperService mapperService = createMapperService(mapping(b -> { - b.startObject("desert"); + b.startObject("dessert"); { b.field("type", "match_only_text"); } @@ -35,46 +35,46 @@ public void testAllPossibleScenarios() throws IOException { })); QueryShardContext queryShardContext = createQueryShardContext(mapperService); - when(queryShardContext.sourcePath("desert")).thenReturn(Set.of("desert")); + when(queryShardContext.sourcePath("dessert")).thenReturn(Set.of("dessert")); when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); - String[] deserts = new String[] { "apple pie pie", "banana split pie", "chocolate cake" }; + String[] desserts = new String[] { "apple pie pie", "banana split pie", "chocolate cake" }; List docs = new ArrayList<>(); - for (String desert : deserts) { - docs.add(mapperService.documentMapper().parse(source(b -> b.field("desert", desert)))); + for (String dessert : desserts) { + docs.add(mapperService.documentMapper().parse(source(b -> b.field("dessert", dessert)))); } SourceFieldMatchQuery matchBoth = new SourceFieldMatchQuery( - QueryBuilders.matchQuery("desert", "apple").doToQuery(queryShardContext), // Delegate query - QueryBuilders.matchQuery("desert", "pie").doToQuery(queryShardContext), // Filter query - queryShardContext.getFieldType("desert"), + QueryBuilders.matchQuery("dessert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), queryShardContext ); SourceFieldMatchQuery matchDelegate = new SourceFieldMatchQuery( - QueryBuilders.matchQuery("desert", "apple").doToQuery(queryShardContext), // Delegate query - QueryBuilders.matchQuery("desert", "juice").doToQuery(queryShardContext), // Filter query - queryShardContext.getFieldType("desert"), + QueryBuilders.matchQuery("dessert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "juice").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), queryShardContext ); SourceFieldMatchQuery matchFilter = new SourceFieldMatchQuery( - QueryBuilders.matchQuery("desert", "tart").doToQuery(queryShardContext), // Delegate query - QueryBuilders.matchQuery("desert", "pie").doToQuery(queryShardContext), // Filter query - queryShardContext.getFieldType("desert"), + QueryBuilders.matchQuery("dessert", "tart").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), queryShardContext ); SourceFieldMatchQuery matchNone = new SourceFieldMatchQuery( - QueryBuilders.matchQuery("desert", "gulab").doToQuery(queryShardContext), // Delegate query - QueryBuilders.matchQuery("desert", "jamun").doToQuery(queryShardContext), // Filter query - queryShardContext.getFieldType("desert"), + QueryBuilders.matchQuery("dessert", "gulab").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "jamun").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), queryShardContext ); SourceFieldMatchQuery matchMultipleDocs = new SourceFieldMatchQuery( QueryBuilders.matchAllQuery().toQuery(queryShardContext), // Delegate query - QueryBuilders.matchQuery("desert", "pie").doToQuery(queryShardContext), // Filter query - queryShardContext.getFieldType("desert"), + QueryBuilders.matchQuery("dessert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), queryShardContext ); @@ -109,14 +109,14 @@ public void testAllPossibleScenarios() throws IOException { public void testSourceDisabled() throws IOException { MapperService mapperService = createMapperService(topMapping(b -> b.startObject("_source").field("enabled", false).endObject())); QueryShardContext queryShardContext = createQueryShardContext(mapperService); - when(queryShardContext.sourcePath("desert")).thenReturn(Set.of("desert")); + when(queryShardContext.sourcePath("dessert")).thenReturn(Set.of("dessert")); when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> new SourceFieldMatchQuery( - QueryBuilders.matchQuery("desert", "apple").doToQuery(queryShardContext), // Delegate query - QueryBuilders.matchQuery("desert", "pie").doToQuery(queryShardContext), // Filter query - queryShardContext.getFieldType("desert"), + QueryBuilders.matchQuery("dessert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), queryShardContext ) ); @@ -129,25 +129,25 @@ public void testSourceDisabled() throws IOException { public void testMissingField() throws IOException { MapperService mapperService = createMapperService(mapping(b -> { - b.startObject("desert"); + b.startObject("dessert"); { b.field("type", "match_only_text"); } b.endObject(); })); QueryShardContext queryShardContext = createQueryShardContext(mapperService); - when(queryShardContext.sourcePath("desert")).thenReturn(Set.of("desert")); + when(queryShardContext.sourcePath("dessert")).thenReturn(Set.of("dessert")); when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); - String[] deserts = new String[] { "apple pie pie", "banana split pie", "chocolate cake" }; + String[] desserts = new String[] { "apple pie pie", "banana split pie", "chocolate cake" }; List docs = new ArrayList<>(); - for (String desert : deserts) { - docs.add(mapperService.documentMapper().parse(source(b -> b.field("desert", desert)))); + for (String dessert : desserts) { + docs.add(mapperService.documentMapper().parse(source(b -> b.field("dessert", dessert)))); } SourceFieldMatchQuery matchDelegate = new SourceFieldMatchQuery( - QueryBuilders.matchQuery("desert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "apple").doToQuery(queryShardContext), // Delegate query QueryBuilders.matchQuery("username", "pie").doToQuery(queryShardContext), // Filter query missing field - queryShardContext.getFieldType("desert"), + queryShardContext.getFieldType("dessert"), queryShardContext ); withLuceneIndex(mapperService, iw -> { From 2e3334471043291730d58cf691e23de397460652 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Wed, 6 Dec 2023 12:28:59 +0530 Subject: [PATCH 17/24] fix integ tests Signed-off-by: Rishabh Maurya --- .../11_match_field_match_only_text.yml | 3 + .../20_ngram_search_field_match_only.yml | 6 ++ ...ram_highligthing_field_match_only_text.yml | 3 + .../40_query_string_field_match_only_text.yml | 3 + ...default_analyzer_field_match_only_text.yml | 3 + ...queries_with_synonyms_field_match_only.yml | 5 ++ .../60_synonym_graph_field_match_only.yml | 3 + .../70_intervals_field_match_only_text.yml | 7 +- .../20_phrase_field_match_only_text.yml | 12 ++++ .../20_highlighting_field_match_only_text.yml | 4 +- .../20_query_string_field_match_only_text.yml | 4 +- .../30_sig_terms_field_match_only_text.yml | 4 +- .../90_sig_text_field_match_only.yml | 8 +-- .../20_highlighting_field_match_only_text.yml | 7 +- .../test/search/160_exists_query.yml | 61 ----------------- .../search/160_exists_query_match_only.yml | 2 - ...00_phrase_search_field_match_only_text.yml | 5 +- ...atch_bool_prefix_field_match_only_text.yml | 67 +------------------ .../320_disallow_queries_field_match_only.yml | 4 +- .../10_basic_field_match_only_field.yml | 17 ++--- .../index/mapper/MappedFieldType.java | 22 +++--- .../mapper/MatchOnlyTextFieldMapper.java | 6 ++ 22 files changed, 83 insertions(+), 173 deletions(-) diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml index a93890f2b3865..40ff2c2f4cdbe 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml @@ -1,6 +1,9 @@ # integration tests for queries with specific analysis chains "match query with stacked stems": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" # Tests the match query stemmed tokens are "stacked" on top of the unstemmed # versions in the same position. - do: diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml index 9d536d346f6f1..95b648dee47c8 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml @@ -1,4 +1,7 @@ "ngram search": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: test @@ -41,6 +44,9 @@ --- "testNGramCopyField": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: test diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml index 9c7d47827a27c..597f55679a2c6 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml @@ -1,4 +1,7 @@ "ngram highlighting": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: test diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml index d3dc85c87ed46..ddebb1d76acbc 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml @@ -1,5 +1,8 @@ --- "Test query string with snowball": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: test diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml index eb884644eac38..97f3fb65e94a2 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml @@ -1,5 +1,8 @@ --- "Test default search analyzer is applied": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: test diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml index b79511d5c4dfd..0c263a47a38e6 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml @@ -1,6 +1,8 @@ --- "Test common terms query with stacked tokens": - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" features: "allowed_warnings" - do: @@ -244,6 +246,9 @@ --- "Test match query with synonyms - see #3881 for extensive description of the issue": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: test diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml index 4b8bc4857250f..91a8b1509517e 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml @@ -1,4 +1,7 @@ setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: test diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml index 6691621fbdf22..9792c9d2695ea 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml @@ -1,5 +1,8 @@ # integration tests for intervals queries using analyzers setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: test @@ -23,8 +26,8 @@ setup: --- "Test use_field": - skip: - version: " - 7.1.99" - reason: "Implemented in 7.2" + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: catch: bad_request search: diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml index 38afe7db89efd..aff2b3f11101c 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml @@ -1,6 +1,9 @@ # Integration tests for the phrase suggester with a few analyzers setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: test @@ -118,6 +121,9 @@ setup: --- "breaks ties by sorting terms": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" # This runs the suggester without bigrams so we can be sure of the sort order - do: search: @@ -174,6 +180,9 @@ setup: --- "doesn't fail when asked to run on a field without unigrams when force_unigrams=false": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: search: rest_total_hits_as_int: true @@ -203,6 +212,9 @@ setup: --- "reverse suggestions": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: search: rest_total_hits_as_int: true diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml index 306aeb8f5c2f2..3cb8e09c70aed 100644 --- a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml +++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml @@ -1,7 +1,7 @@ setup: - skip: - version: " - 2.11.99" - reason: "match_only_text field was introduced in 2.12.0" + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml index 5387dc8c0bca6..085c5633ac72b 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml @@ -1,8 +1,8 @@ --- "validate_query with query_string parameters": - skip: - version: " - 2.11.99" - reason: "match_only_text field was introduced in 2.12.0" + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: test diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml index 60c59a93ca62a..7a96536a2e261 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml @@ -1,8 +1,8 @@ --- "Default index": - skip: - version: " - 2.11.99" - reason: "match_only_text field was introduced in 2.12.0" + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: goodbad diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml index 01548df586de6..bc41f157dfdc4 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml @@ -1,8 +1,8 @@ --- "Default index": - skip: - version: " - 2.11.99" - reason: "match_only_text field was introduced in 2.12.0" + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: goodbad @@ -78,8 +78,8 @@ --- "Dedup noise": - skip: - version: " - 2.11.99" - reason: "match_only_text field was introduced in 2.12.0" + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: goodbad diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml index d3fdd87ee3a63..7100d620bf19e 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml @@ -1,7 +1,7 @@ setup: - skip: - version: " - 2.11.99" - reason: "match_only_text field was introduced in 2.12.0" + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: index: test @@ -89,9 +89,6 @@ setup: --- "Unified highlighter with stored fields and disabled source": - - skip: - version: "- 7.10.1" - reason: "bug fix introduced in 7.10.2" - do: indices.create: index: disabled_source diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml index 582880b0748c9..be97930d41eb9 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml @@ -45,8 +45,6 @@ setup: type: keyword text: type: text - match_only_text: - type: match_only_text - do: headers: @@ -72,7 +70,6 @@ setup: inner1: "foo" inner2: "bar" text: "foo bar" - match_only_text: "foo bar" - do: headers: @@ -97,7 +94,6 @@ setup: object: inner1: "foo" text: "foo bar" - match_only_text: "foo bar" - do: headers: @@ -123,7 +119,6 @@ setup: object: inner2: "bar" text: "foo bar" - match_only_text: "foo bar" - do: index: @@ -189,8 +184,6 @@ setup: doc_values: false text: type: text - match_only_text: - type: match_only_text - do: headers: @@ -216,7 +209,6 @@ setup: inner1: "foo" inner2: "bar" text: "foo bar" - match_only_text: "foo bar" - do: headers: @@ -241,7 +233,6 @@ setup: object: inner1: "foo" text: "foo bar" - match_only_text: "foo bar" - do: headers: @@ -267,7 +258,6 @@ setup: object: inner2: "bar" text: "foo bar" - match_only_text: "foo bar" - do: index: @@ -332,8 +322,6 @@ setup: type: keyword text: type: text - match_only_text: - type: match_only_text - do: indices.refresh: @@ -546,18 +534,7 @@ setup: field: text - match: {hits.total: 3} ---- -"Test exists query on mapped match_only_text field": - - do: - search: - rest_total_hits_as_int: true - index: test - body: - query: - exists: - field: match_only_text - - match: {hits.total: 3} --- "Test exists query on _id field": - do: @@ -844,18 +821,6 @@ setup: - match: {hits.total: 0} --- -"Test exists query on unmapped match_only_text field": - - do: - search: - rest_total_hits_as_int: true - index: test-unmapped - body: - query: - exists: - field: match_only_text - - - match: {hits.total: 0} ---- "Test exists query on binary field in empty index": - do: search: @@ -1063,19 +1028,6 @@ setup: - match: {hits.total: 0} ---- -"Test exists query on match_only_text field in empty index": - - do: - search: - rest_total_hits_as_int: true - index: test-empty - body: - query: - exists: - field: match_only_text - - - match: {hits.total: 0} - --- "Test exists query on mapped binary field with no doc values": - do: @@ -1284,16 +1236,3 @@ setup: field: text - match: {hits.total: 3} - ---- -"Test exists query on mapped match_only_text field with no doc values": - - do: - search: - rest_total_hits_as_int: true - index: test-no-dv - body: - query: - exists: - field: match_only_text - - - match: {hits.total: 3} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only.yml index d5a9f24c15825..03626236604a1 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only.yml @@ -2,7 +2,6 @@ setup: - skip: version: " - 2.99.99" reason: "match_only_text was added in 3.0" - - skip: features: ["headers"] - do: @@ -118,4 +117,3 @@ setup: field: match_only_text - match: {hits.total: 0} - diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml index fa51cd83e62ad..a41b8d353e3e9 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml @@ -1,8 +1,9 @@ --- "search with indexed phrases": - skip: - version: " - 2.11.99" - reason: "match_only_text field was introduced in 2.12.0" + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: indices.create: index: test diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml index 522a5fe077846..fc4e9f9de0f38 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml @@ -1,7 +1,7 @@ setup: - skip: - version: " - 2.11.99" - reason: "match_only_text field was introduced in 2.12.0" + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: @@ -49,43 +49,6 @@ setup: - do: indices.refresh: {} ---- -"scoring complete term": - - - do: - search: - rest_total_hits_as_int: true - index: test - body: - query: - match_bool_prefix: - my_field1: "brown fox jump" - - - match: { hits.total: 3 } - - match: { hits.hits.0._source.my_field1: "brown fox jump" } - - match: { hits.hits.1._source.my_field1: "brown emu jump" } - - match: { hits.hits.2._source.my_field1: "jumparound" } - ---- -"scoring partial term": - - - do: - search: - rest_total_hits_as_int: true - index: test - body: - query: - match_bool_prefix: - my_field1: "brown fox ju" - - - match: { hits.total: 3 } - - match: { hits.hits.0._id: "1" } - - match: { hits.hits.0._source.my_field1: "brown fox jump" } - - match: { hits.hits.1._id: "2" } - - match: { hits.hits.1._source.my_field1: "brown emu jump" } - - match: { hits.hits.2._id: "3" } - - match: { hits.hits.2._source.my_field1: "jumparound" } - --- "minimum should match": @@ -177,12 +140,6 @@ setup: fields: [ "my_field1" ] - match: { hits.total: 3 } - - match: { hits.hits.0._id: "1" } - - match: { hits.hits.0._source.my_field1: "brown fox jump" } - - match: { hits.hits.1._id: "2" } - - match: { hits.hits.1._source.my_field1: "brown emu jump" } - - match: { hits.hits.2._id: "3" } - - match: { hits.hits.2._source.my_field1: "jumparound" } --- "multi_match single field partial term": @@ -199,12 +156,6 @@ setup: fields: [ "my_field1" ] - match: { hits.total: 3 } - - match: { hits.hits.0._id: "1" } - - match: { hits.hits.0._source.my_field1: "brown fox jump" } - - match: { hits.hits.1._id: "2" } - - match: { hits.hits.1._source.my_field1: "brown emu jump" } - - match: { hits.hits.2._id: "3" } - - match: { hits.hits.2._source.my_field1: "jumparound" } --- "multi_match multiple fields complete term": @@ -221,13 +172,6 @@ setup: fields: [ "my_field1", "my_field2" ] - match: { hits.total: 3 } - - match: { hits.hits.0._id: "4" } - - match: { hits.hits.0._source.my_field1: "dog" } - - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } - - match: { hits.hits.1._id: "1" } - - match: { hits.hits.1._source.my_field1: "brown fox jump" } - - match: { hits.hits.2._id: "2" } - - match: { hits.hits.2._source.my_field1: "brown emu jump" } --- "multi_match multiple fields partial term": @@ -244,13 +188,6 @@ setup: fields: [ "my_field1", "my_field2" ] - match: { hits.total: 3 } - - match: { hits.hits.0._id: "4" } - - match: { hits.hits.0._source.my_field1: "dog" } - - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } - - match: { hits.hits.1._id: "1" } - - match: { hits.hits.1._source.my_field1: "brown fox jump" } - - match: { hits.hits.2._id: "2" } - - match: { hits.hits.2._source.my_field1: "brown emu jump" } --- "multi_match multiple fields with analyzer": diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml index 40989de61810b..8772d6d0300b7 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml @@ -1,8 +1,8 @@ --- setup: - skip: - version: " - 7.6.99" - reason: "implemented in 7.7.0" + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" - do: indices.create: diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml index 142293bc33c5c..cc15796e4697f 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml @@ -1,18 +1,9 @@ ---- -"Basic /_search_shards test": - - do: - indices.create: - index: test_1 - - - do: - search_shards: - index: test_1 - routing: foo - - - match: { shards.0.0.index: test_1 } - --- "Search shards aliases with and without filters": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: indices.create: index: test_index diff --git a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java index 6f9e3f894e091..66d4654e543a2 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java @@ -354,36 +354,36 @@ public Query existsQuery(QueryShardContext context) { } public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { - return phraseQuery(stream, slop, enablePositionIncrements, null); - } - - public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { throw new IllegalArgumentException( "Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { - return multiPhraseQuery(stream, slop, enablePositionIncrements, null); + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { + return phraseQuery(stream, slop, enablePositionIncrements); } - public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) - throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { throw new IllegalArgumentException( "Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { - return phrasePrefixQuery(stream, slop, maxExpansions, null); + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) + throws IOException { + return multiPhraseQuery(stream, slop, enablePositionIncrements); } - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { throw new IllegalArgumentException( "Can only use phrase prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { + return phrasePrefixQuery(stream, slop, maxExpansions); + } + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { throw new IllegalArgumentException( "Can only use span prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java index e69336dc385aa..fb97f8c309a70 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -247,6 +247,8 @@ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositi BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (Term[] terms : multiPhraseQuery.getTermArrays()) { if (terms.length > 1) { + // Multiple terms in the same position, creating a disjunction query for it and + // adding it to conjunction query BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); for (Term term : terms) { disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); @@ -266,11 +268,15 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (int i = 0; i < termArray.size(); i++) { if (i == termArray.size() - 1) { + // last element of the term Array is a prefix, thus creating a prefix query for it and adding it to + // conjunction query MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery(name()); mqb.add(termArray.get(i).toArray(new Term[0])); builder.add(mqb, BooleanClause.Occur.FILTER); } else { if (termArray.get(i).size() > 1) { + // multiple terms in the same position, creating a disjunction query for it and + // adding it to conjunction query BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); for (Term term : termArray.get(i)) { disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); From b8ce7f89b0030dd2435f1a10a9c02f52fa34f4ed Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Wed, 6 Dec 2023 16:23:43 +0530 Subject: [PATCH 18/24] Fix flaky test due to random indexwriter Signed-off-by: Rishabh Maurya --- .../query/SourceFieldMatchQueryTests.java | 40 ++++++++++++------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java index 894e75099ae34..3a7588213d436 100644 --- a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java +++ b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java @@ -8,9 +8,14 @@ package org.opensearch.index.query; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; import org.opensearch.core.index.Index; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.mapper.MapperServiceTestCase; @@ -77,13 +82,14 @@ public void testAllPossibleScenarios() throws IOException { queryShardContext.getFieldType("dessert"), queryShardContext ); - - withLuceneIndex(mapperService, iw -> { - for (ParsedDocument d : docs) { - iw.addDocument(d.rootDoc()); - } - }, reader -> { - IndexSearcher searcher = newSearcher(reader); + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(mapperService.indexAnalyzer())); + for (ParsedDocument d : docs) { + iw.addDocument(d.rootDoc()); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(matchBoth, 10); assertEquals(topDocs.totalHits.value, 1); assertEquals(topDocs.scoreDocs[0].doc, 0); @@ -103,7 +109,8 @@ public void testAllPossibleScenarios() throws IOException { for (ScoreDoc scoreDoc : topDocs.scoreDocs) { assertEquals(scoreDoc.score, 1.0, 0.00000000001); } - }); + } + dir.close(); } public void testSourceDisabled() throws IOException { @@ -150,14 +157,17 @@ public void testMissingField() throws IOException { queryShardContext.getFieldType("dessert"), queryShardContext ); - withLuceneIndex(mapperService, iw -> { - for (ParsedDocument d : docs) { - iw.addDocument(d.rootDoc()); - } - }, reader -> { - IndexSearcher searcher = newSearcher(reader); + Directory dir = newDirectory(); + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(mapperService.indexAnalyzer())); + for (ParsedDocument d : docs) { + iw.addDocument(d.rootDoc()); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(matchDelegate, 10); assertEquals(topDocs.totalHits.value, 0); - }); + } + dir.close(); } } From b7a607b724393649cf72fd753dc1e85c57d516bb Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Fri, 8 Dec 2023 11:02:24 +0530 Subject: [PATCH 19/24] pr comment: header modification Signed-off-by: Rishabh Maurya --- .../MatchOnlyTextFieldAnalyzerModeTests.java | 24 ------------------- .../mapper/MatchOnlyTextFieldMapperTests.java | 24 ------------------- .../mapper/MatchOnlyTextFieldTypeTests.java | 23 ------------------ 3 files changed, 71 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java index f6b80bbd741f2..13cb279418fa8 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java @@ -6,30 +6,6 @@ * compatible open source license. */ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - package org.opensearch.index.mapper; public class MatchOnlyTextFieldAnalyzerModeTests extends TextFieldAnalyzerModeTests { diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java index 36aee352732d2..c0bdc2a576990 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -6,30 +6,6 @@ * compatible open source license. */ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - package org.opensearch.index.mapper; import org.apache.lucene.index.DocValuesType; diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java index acd12c04a99bc..51234fa04ddc2 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java @@ -6,29 +6,6 @@ * compatible open source license. */ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -/* - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - package org.opensearch.index.mapper; import org.opensearch.common.lucene.Lucene; From ee78960ad1e5d6efba12338e8a60836e0c978e9f Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Wed, 27 Dec 2023 15:53:39 -0800 Subject: [PATCH 20/24] Address PR comments Signed-off-by: Rishabh Maurya --- ...20_ngram_search_field_match_only_text.yml} | 0 ...s_with_synonyms_field_match_only_text.yml} | 0 ...0_synonym_graph_field_match_only_text.yml} | 0 ... => 90_sig_text_field_match_only_text.yml} | 0 ...l => 160_exists_query_match_only_text.yml} | 0 ...isallow_queries_field_match_only_text.yml} | 6 -- .../index/query/SourceFieldMatchQuery.java | 12 +-- .../query/SourceFieldMatchQueryTests.java | 76 +++++++++---------- 8 files changed, 44 insertions(+), 50 deletions(-) rename modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/{20_ngram_search_field_match_only.yml => 20_ngram_search_field_match_only_text.yml} (100%) rename modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/{50_queries_with_synonyms_field_match_only.yml => 50_queries_with_synonyms_field_match_only_text.yml} (100%) rename modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/{60_synonym_graph_field_match_only.yml => 60_synonym_graph_field_match_only_text.yml} (100%) rename rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/{90_sig_text_field_match_only.yml => 90_sig_text_field_match_only_text.yml} (100%) rename rest-api-spec/src/main/resources/rest-api-spec/test/search/{160_exists_query_match_only.yml => 160_exists_query_match_only_text.yml} (100%) rename rest-api-spec/src/main/resources/rest-api-spec/test/search/{320_disallow_queries_field_match_only.yml => 320_disallow_queries_field_match_only_text.yml} (96%) diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml similarity index 100% rename from modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml rename to modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml similarity index 100% rename from modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml rename to modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml similarity index 100% rename from modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml rename to modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml similarity index 100% rename from rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml rename to rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml similarity index 100% rename from rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only.yml rename to rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml similarity index 96% rename from rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml rename to rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml index 8772d6d0300b7..f4faf87eb83cc 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml @@ -34,9 +34,6 @@ setup: --- teardown: - - skip: - version: " - 7.6.99" - reason: "implemented in 7.7.0" - do: cluster.put_settings: @@ -46,9 +43,6 @@ teardown: --- "Test disallow expensive queries": - - skip: - version: " - 7.6.99" - reason: "implemented in 7.7.0" ### Check for initial setting = null -> false - do: diff --git a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java index fd8d7045f307e..b0be20e417efe 100644 --- a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java +++ b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java @@ -34,12 +34,12 @@ * Useful to query against field type which doesn't store positional data and field is not stored/computed dynamically. */ public class SourceFieldMatchQuery extends Query { - final private Query delegateQuery; - final private Query filter; - final private SearchLookup lookup; - final private MappedFieldType fieldType; - final private SourceValueFetcher valueFetcher; - final private QueryShardContext context; + private final Query delegateQuery; + private final Query filter; + private final SearchLookup lookup; + private final MappedFieldType fieldType; + private final SourceValueFetcher valueFetcher; + private final QueryShardContext context; /** * Constructs a SourceFieldMatchQuery. diff --git a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java index 3a7588213d436..6af717a97b328 100644 --- a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java +++ b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java @@ -82,35 +82,35 @@ public void testAllPossibleScenarios() throws IOException { queryShardContext.getFieldType("dessert"), queryShardContext ); - Directory dir = newDirectory(); - IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(mapperService.indexAnalyzer())); - for (ParsedDocument d : docs) { - iw.addDocument(d.rootDoc()); - } - try (IndexReader reader = DirectoryReader.open(iw)) { - iw.close(); - IndexSearcher searcher = new IndexSearcher(reader); - TopDocs topDocs = searcher.search(matchBoth, 10); - assertEquals(topDocs.totalHits.value, 1); - assertEquals(topDocs.scoreDocs[0].doc, 0); - - topDocs = searcher.search(matchDelegate, 10); - assertEquals(topDocs.totalHits.value, 0); - - topDocs = searcher.search(matchFilter, 10); - assertEquals(topDocs.totalHits.value, 0); - - topDocs = searcher.search(matchNone, 10); - assertEquals(topDocs.totalHits.value, 0); - - topDocs = searcher.search(matchMultipleDocs, 10); - assertEquals(topDocs.totalHits.value, 2); - // assert constant score - for (ScoreDoc scoreDoc : topDocs.scoreDocs) { - assertEquals(scoreDoc.score, 1.0, 0.00000000001); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(mapperService.indexAnalyzer())); + for (ParsedDocument d : docs) { + iw.addDocument(d.rootDoc()); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs topDocs = searcher.search(matchBoth, 10); + assertEquals(topDocs.totalHits.value, 1); + assertEquals(topDocs.scoreDocs[0].doc, 0); + + topDocs = searcher.search(matchDelegate, 10); + assertEquals(topDocs.totalHits.value, 0); + + topDocs = searcher.search(matchFilter, 10); + assertEquals(topDocs.totalHits.value, 0); + + topDocs = searcher.search(matchNone, 10); + assertEquals(topDocs.totalHits.value, 0); + + topDocs = searcher.search(matchMultipleDocs, 10); + assertEquals(topDocs.totalHits.value, 2); + // assert constant score + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + assertEquals(scoreDoc.score, 1.0, 0.00000000001); + } } } - dir.close(); } public void testSourceDisabled() throws IOException { @@ -157,17 +157,17 @@ public void testMissingField() throws IOException { queryShardContext.getFieldType("dessert"), queryShardContext ); - Directory dir = newDirectory(); - IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(mapperService.indexAnalyzer())); - for (ParsedDocument d : docs) { - iw.addDocument(d.rootDoc()); - } - try (IndexReader reader = DirectoryReader.open(iw)) { - iw.close(); - IndexSearcher searcher = new IndexSearcher(reader); - TopDocs topDocs = searcher.search(matchDelegate, 10); - assertEquals(topDocs.totalHits.value, 0); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(mapperService.indexAnalyzer())); + for (ParsedDocument d : docs) { + iw.addDocument(d.rootDoc()); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs topDocs = searcher.search(matchDelegate, 10); + assertEquals(topDocs.totalHits.value, 0); + } } - dir.close(); } } From 190767323e6e6382e1b7d870a514d3a9b1537090 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Thu, 28 Dec 2023 09:45:40 -0800 Subject: [PATCH 21/24] addded change to the right section of CHANGELOG Signed-off-by: Rishabh Maurya --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 579d2695d3125..159bbe59703e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -119,6 +119,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Create separate transport action for render search template action ([#11170](https://github.com/opensearch-project/OpenSearch/pull/11170)) - Add additional handling in SearchTemplateRequest when simulate is set to true ([#11591](https://github.com/opensearch-project/OpenSearch/pull/11591)) - Introduce cluster level setting `cluster.index.restrict.replication.type` to prevent replication type setting override during index creations([#11583](https://github.com/opensearch-project/OpenSearch/pull/11583)) +- New match_only_text field, type of text field optimized for storage by trading off positional queries performance ([#6836](https://github.com/opensearch-project/OpenSearch/pull/11039)) ### Dependencies - Bumps jetty version to 9.4.52.v20230823 to fix GMS-2023-1857 ([#9822](https://github.com/opensearch-project/OpenSearch/pull/9822)) From c04f89ef5fb1504af1225a22264f002d9e0ed69f Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Fri, 29 Dec 2023 13:02:01 -0800 Subject: [PATCH 22/24] overriding the textFieldType before every test Signed-off-by: Rishabh Maurya --- .../index/mapper/MatchOnlyTextFieldMapperTests.java | 6 +++--- .../org/opensearch/index/mapper/TextFieldMapperTests.java | 8 +++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java index c0bdc2a576990..ab1e601cedb18 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -29,7 +29,7 @@ import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.query.SourceFieldMatchQuery; import org.opensearch.index.search.MatchQuery; -import org.junit.BeforeClass; +import org.junit.Before; import java.io.IOException; import java.util.HashMap; @@ -42,8 +42,8 @@ public class MatchOnlyTextFieldMapperTests extends TextFieldMapperTests { - @BeforeClass - public static void beforeClass() { + @Before + public void setup() { textFieldName = "match_only_text"; } diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java index 3dce7a0ed646f..a22bfa5e845b1 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java @@ -81,6 +81,7 @@ import org.opensearch.index.query.MatchPhraseQueryBuilder; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.search.MatchQuery; +import org.junit.Before; import java.io.IOException; import java.util.Arrays; @@ -95,7 +96,12 @@ public class TextFieldMapperTests extends MapperTestCase { - public static String textFieldName = "text"; + public String textFieldName = "text"; + + @Before + public void setup() { + textFieldName = "text"; + } @Override protected void writeFieldValue(XContentBuilder builder) throws IOException { From e879baf45758671290a87a86ead3923b7255811f Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Fri, 29 Dec 2023 13:23:24 -0800 Subject: [PATCH 23/24] rename @Before method Signed-off-by: Rishabh Maurya --- .../opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java index ab1e601cedb18..580f8cccc9af5 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -43,7 +43,7 @@ public class MatchOnlyTextFieldMapperTests extends TextFieldMapperTests { @Before - public void setup() { + public void setupMatchOnlyTextFieldMapper() { textFieldName = "match_only_text"; } From f1fb4433a9f6599a37c177bbb93917104942f587 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Tue, 2 Jan 2024 14:19:22 -0800 Subject: [PATCH 24/24] update changelog description Signed-off-by: Rishabh Maurya --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 159bbe59703e5..2e22f2cd539e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -119,7 +119,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Create separate transport action for render search template action ([#11170](https://github.com/opensearch-project/OpenSearch/pull/11170)) - Add additional handling in SearchTemplateRequest when simulate is set to true ([#11591](https://github.com/opensearch-project/OpenSearch/pull/11591)) - Introduce cluster level setting `cluster.index.restrict.replication.type` to prevent replication type setting override during index creations([#11583](https://github.com/opensearch-project/OpenSearch/pull/11583)) -- New match_only_text field, type of text field optimized for storage by trading off positional queries performance ([#6836](https://github.com/opensearch-project/OpenSearch/pull/11039)) +- Add match_only_text field that is optimized for storage by trading off positional queries performance ([#6836](https://github.com/opensearch-project/OpenSearch/pull/11039)) ### Dependencies - Bumps jetty version to 9.4.52.v20230823 to fix GMS-2023-1857 ([#9822](https://github.com/opensearch-project/OpenSearch/pull/9822))