Skip to content

Commit

Permalink
Added unit tests for phrase and multiphrase query validation
Browse files Browse the repository at this point in the history
Signed-off-by: Rishabh Maurya <[email protected]>
  • Loading branch information
rishabhmaurya committed Nov 15, 2023
1 parent 05d96bd commit 6fdb8da
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,11 @@ private static Parameter<String> indexOptions(Function<FieldMapper, String> init
m -> Optional.ofNullable(((MatchOnlyTextFieldType) m.mappedFieldType).prefixFieldType)
.map(p -> new PrefixConfig(p.minChars, p.maxChars))
.orElse(null)
).acceptsNull().setValidator( v -> {
if (v != null) {
throw new MapperParsingException("Index prefixes cannot be enabled on for match_only_text field. Use text field instead");
}
).acceptsNull().setValidator(v -> {
if (v != null) {
throw new MapperParsingException("Index prefixes cannot be enabled on for match_only_text field. Use text field instead");
}
);
});

private static Parameter<Boolean> norms(Function<FieldMapper, Boolean> initializer) {
return Parameter.boolParam("norms", false, initializer, false)
Expand Down Expand Up @@ -226,6 +225,7 @@ public static final class MatchOnlyTextFieldType extends TextFieldMapper.TextFie
private final boolean indexPhrases = false;

private PrefixFieldType prefixFieldType;

@Override
public String typeName() {
return CONTENT_TYPE;
Expand All @@ -242,13 +242,7 @@ public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncremen
for (Term term : phraseQuery.getTerms()) {
builder.add(new TermQuery(term), BooleanClause.Occur.FILTER);
}
return new SourceFieldMatchQuery(
builder.build(),
phraseQuery,
this,
(SourceValueFetcher) this.valueFetcher(context, context.lookup(), null),
context.lookup()
);
return new SourceFieldMatchQuery(builder.build(), phraseQuery, this, context);
}

@Override
Expand All @@ -257,19 +251,17 @@ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositi
MultiPhraseQuery multiPhraseQuery = (MultiPhraseQuery) super.multiPhraseQuery(stream, slop, enablePositionIncrements);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (Term[] terms : multiPhraseQuery.getTermArrays()) {
BooleanQuery.Builder disjunctions = new BooleanQuery.Builder();
for (Term term : terms) {
disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
if (terms.length > 1) {
BooleanQuery.Builder disjunctions = new BooleanQuery.Builder();
for (Term term : terms) {
disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
}
builder.add(disjunctions.build(), BooleanClause.Occur.FILTER);
} else {
builder.add(new TermQuery(terms[0]), BooleanClause.Occur.FILTER);
}
builder.add(disjunctions.build(), BooleanClause.Occur.FILTER);
}
return new SourceFieldMatchQuery(
builder.build(),
multiPhraseQuery,
this,
(SourceValueFetcher) this.valueFetcher(context, context.lookup(), null),
context.lookup()
);
return new SourceFieldMatchQuery(builder.build(), multiPhraseQuery, this, context);
}

@Override
Expand All @@ -290,13 +282,7 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,
}
builder.add(disjunctions.build(), BooleanClause.Occur.FILTER);
}
return new SourceFieldMatchQuery(
builder.build(),
phrasePrefixQuery,
this,
(SourceValueFetcher) this.valueFetcher(context, context.lookup(), null),
context.lookup()
);
return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this, context);
}

private List<List<Term>> getTermsFromTokenStream(TokenStream stream) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,28 +39,23 @@ public class SourceFieldMatchQuery extends Query {
final private SearchLookup lookup;
final private MappedFieldType fieldType;
final private SourceValueFetcher valueFetcher;
final private QueryShardContext context;

/**
* Constructs a SourceFieldMatchQuery.
*
* @param delegateQuery The parent query to use to find matches.
* @param filter The query used to filter further by running against field value computed using _source field.
* @param fieldType The mapped field type.
* @param valueFetcher The source value fetcher.
* @param lookup The search lookup.
* @param context The QueryShardContext to get lookup and valueFetcher
*/
public SourceFieldMatchQuery(
Query delegateQuery,
Query filter,
MappedFieldType fieldType,
SourceValueFetcher valueFetcher,
SearchLookup lookup
) {
public SourceFieldMatchQuery(Query delegateQuery, Query filter, MappedFieldType fieldType, QueryShardContext context) {
this.delegateQuery = delegateQuery;
this.filter = filter;
this.fieldType = fieldType;
this.valueFetcher = valueFetcher;
this.lookup = lookup;
this.context = context;
this.lookup = context.lookup();
this.valueFetcher = (SourceValueFetcher) fieldType.valueFetcher(context, lookup, null);
}

@Override
Expand All @@ -74,7 +69,7 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
if (rewritten == delegateQuery) {
return this;
}
return new SourceFieldMatchQuery(rewritten, filter, fieldType, valueFetcher, lookup);
return new SourceFieldMatchQuery(rewritten, filter, fieldType, context);
}

@Override
Expand Down Expand Up @@ -132,15 +127,14 @@ public boolean equals(Object o) {
}
SourceFieldMatchQuery other = (SourceFieldMatchQuery) o;
return Objects.equals(this.delegateQuery, other.delegateQuery)
&& this.filter == other.filter
&& Objects.equals(this.lookup, other.lookup)
&& Objects.equals(this.filter, other.filter)
&& Objects.equals(this.fieldType, other.fieldType)
&& Objects.equals(this.valueFetcher, other.valueFetcher);
&& Objects.equals(this.context, other.context);
}

@Override
public int hashCode() {
return Objects.hash(classHash(), delegateQuery, filter, lookup, fieldType, valueFetcher);
return Objects.hash(classHash(), delegateQuery, filter, fieldType, context);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,21 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.tests.analysis.MockSynonymAnalyzer;
import org.opensearch.core.common.Strings;
import org.opensearch.core.xcontent.MediaTypeRegistry;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.query.MatchPhraseQueryBuilder;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.index.query.SourceFieldMatchQuery;
import org.opensearch.index.search.MatchQuery;
import org.junit.BeforeClass;

import java.io.IOException;
Expand All @@ -48,6 +60,7 @@

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.core.Is.is;

public class MatchOnlyTextFieldMapperTests extends TextFieldMapperTests {

Expand Down Expand Up @@ -91,10 +104,10 @@ public void testIndexOptions() throws IOException {
Map<String, IndexOptions> supportedOptions = new HashMap<>();
supportedOptions.put("docs", IndexOptions.DOCS);

Map<String, IndexOptions> unSupportedOptions = new HashMap<>();
unSupportedOptions.put("freqs", IndexOptions.DOCS_AND_FREQS);
unSupportedOptions.put("positions", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
unSupportedOptions.put("offsets", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Map<String, IndexOptions> unsupportedOptions = new HashMap<>();
unsupportedOptions.put("freqs", IndexOptions.DOCS_AND_FREQS);
unsupportedOptions.put("positions", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
unsupportedOptions.put("offsets", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);

for (String option : supportedOptions.keySet()) {
XContentBuilder mapping = MediaTypeRegistry.JSON.contentBuilder().startObject().startObject("_doc").startObject("properties");
Expand All @@ -113,7 +126,7 @@ public void testIndexOptions() throws IOException {
assertEquals(options, fields[0].fieldType().indexOptions());
}

for (String option : unSupportedOptions.keySet()) {
for (String option : unsupportedOptions.keySet()) {
XContentBuilder mapping = MediaTypeRegistry.JSON.contentBuilder().startObject().startObject("_doc").startObject("properties");
mapping.startObject(option).field("type", textFieldName).field("index_options", option).endObject();
mapping.endObject().endObject().endObject();
Expand Down Expand Up @@ -143,33 +156,111 @@ public void testAnalyzedFieldPositionIncrementWithoutPositions() {
}

@Override
public void testBWCSerialization() throws IOException {
}
public void testBWCSerialization() throws IOException {}

@Override
public void testPositionIncrementGap() throws IOException {
}
public void testPositionIncrementGap() throws IOException {}

@Override
public void testDefaultPositionIncrementGap() throws IOException {
}
public void testDefaultPositionIncrementGap() throws IOException {}

@Override
public void testIndexPrefixMapping() throws IOException {
}
public void testIndexPrefixMapping() throws IOException {}

@Override
public void testIndexPrefixIndexTypes() throws IOException {
}
public void testIndexPrefixIndexTypes() throws IOException {}

@Override
public void testFastPhrasePrefixes() throws IOException {
}
public void testFastPhrasePrefixes() throws IOException {}

@Override
public void testFastPhraseMapping() throws IOException {
}
public void testFastPhraseMapping() throws IOException {}

@Override
public void testSimpleMerge() throws IOException {
public void testSimpleMerge() throws IOException {}

public void testPhraseQuery() throws IOException {
MapperService mapperService = createMapperService(mapping(b -> {
b.startObject("field").field("type", textFieldName).field("analyzer", "my_stop_analyzer").endObject();
// "standard" will be replaced with MockSynonymAnalyzer
b.startObject("synfield").field("type", textFieldName).field("analyzer", "standard").endObject();
}));
QueryShardContext queryShardContext = createQueryShardContext(mapperService);

Query q = new MatchPhraseQueryBuilder("field", "two words").toQuery(queryShardContext);
Query expectedQuery = new SourceFieldMatchQuery(
new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER)
.add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER)
.build(),
new PhraseQuery("field", "two", "words"),
mapperService.fieldType("field"),
queryShardContext
);

assertThat(q, is(expectedQuery));
Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext);
assertThat(q4, is(new TermQuery(new Term("field", "singleton"))));

Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext);
expectedQuery = new SourceFieldMatchQuery(
new BooleanQuery.Builder().add(new TermQuery(new Term("field", "three")), BooleanClause.Occur.FILTER)
.add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER)
.add(new TermQuery(new Term("field", "here")), BooleanClause.Occur.FILTER)
.build(),
new PhraseQuery("field", "three", "words", "here"),
mapperService.fieldType("field"),
queryShardContext
);
assertThat(q2, is(expectedQuery));

Query q3 = new MatchPhraseQueryBuilder("field", "two words").slop(2).toQuery(queryShardContext);
expectedQuery = new SourceFieldMatchQuery(
new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER)
.add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER)
.build(),
new PhraseQuery(2, "field", "two", "words"),
mapperService.fieldType("field"),
queryShardContext
);
assertThat(q3, is(expectedQuery));

Query q5 = new MatchPhraseQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext);
expectedQuery = new SourceFieldMatchQuery(
new BooleanQuery.Builder().add(new TermQuery(new Term("field", "sparkle")), BooleanClause.Occur.FILTER)
.add(new TermQuery(new Term("field", "stopword")), BooleanClause.Occur.FILTER)
.build(),
new PhraseQuery.Builder().add(new Term("field", "sparkle")).add(new Term("field", "stopword"), 2).build(),
mapperService.fieldType("field"),
queryShardContext
);
assertThat(q5, is(expectedQuery));

MatchQuery matchQuery = new MatchQuery(queryShardContext);
matchQuery.setAnalyzer(new MockSynonymAnalyzer());
Query q6 = matchQuery.parse(MatchQuery.Type.PHRASE, "synfield", "motor dogs");
expectedQuery = new SourceFieldMatchQuery(
new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "motor")), BooleanClause.Occur.FILTER)
.add(
new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "dogs")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("synfield", "dog")), BooleanClause.Occur.SHOULD)
.build(),
BooleanClause.Occur.FILTER
)
.build(),
new MultiPhraseQuery.Builder().add(new Term("synfield", "motor"))
.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }, 1)
.build(),
mapperService.fieldType("synfield"),
queryShardContext
);
assertThat(q6, is(expectedQuery));
}

public void testMultiPhraseQuery() {

}

public void testPrefixQuery() {

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,7 @@ public void testIndexPrefixIndexTypes() throws IOException {
}

public void testNestedIndexPrefixes() throws IOException {

}

public void testFastPhraseMapping() throws IOException {
Expand Down

0 comments on commit 6fdb8da

Please sign in to comment.