Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add use_field option to intervals query #40157

Merged
merged 5 commits into from
Mar 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/reference/query-dsl/intervals-query.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ Which analyzer should be used to analyze terms in the `query`. By
default, the search analyzer of the top-level field will be used.
`filter`::
An optional <<interval_filter,interval filter>>
`use_field`::
If specified, then match intervals from this field rather than the top-level field.
Terms will be analyzed using the search analyzer from this field. This allows you
to search across multiple fields as if they were all the same field; for example,
you could index the same text into stemmed and unstemmed fields, and search for
stemmed tokens near unstemmed ones.

[[intervals-all_of]]
==== `all_of`
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# integration tests for intervals queries using analyzers
setup:
- do:
indices.create:
index: test
body:
mappings:
properties:
text:
type: text
analyzer: standard
text_en:
type: text
analyzer: english
- do:
bulk:
refresh: true
body:
- '{"index": {"_index": "test", "_id": "4"}}'
- '{"text" : "Outside it is cold and wet and raining cats and dogs",
"text_en" : "Outside it is cold and wet and raining cats and dogs"}'

---
"Test use_field":
- skip:
version: " - 7.9.99" # TODO change to 7.0.99 after backport
reason: "Implemented in 7.1"
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- match:
query: cats
- match:
query: dog
max_gaps: 1
- match: { hits.total.value: 0 }
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- match:
query: cats
- match:
query: dog
use_field: text_en
max_gaps: 1
- match: { hits.total.value: 1 }

Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ public Query existsQuery(QueryShardContext context) {
@Override
public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException {
if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
throw new IllegalArgumentException("Cannot create intervals against field [" + name() + "] with no positions indexed");
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer);
return builder.analyzeText(text, maxGaps, ordered);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

package org.elasticsearch.index.query;

import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.intervals.IntervalQuery;
Expand All @@ -31,7 +30,9 @@
import org.elasticsearch.index.mapper.MappedFieldType;

import java.io.IOException;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;

/**
* Builder for {@link IntervalQuery}
Expand Down Expand Up @@ -128,9 +129,14 @@ protected Query doToQuery(QueryShardContext context) throws IOException {
// Be lenient with unmapped fields so that cross-index search will work nicely
return new MatchNoDocsQuery();
}
if (fieldType.tokenized() == false ||
fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
throw new IllegalArgumentException("Cannot create IntervalQuery over field [" + field + "] with no indexed positions");
Set<String> maskedFields = new HashSet<>();
sourceProvider.extractFields(maskedFields);
for (String maskedField : maskedFields) {
MappedFieldType ft = context.fieldMapper(maskedField);
if (ft == null) {
// Be lenient with unmapped fields so that cross-index search will work nicely
return new MatchNoDocsQuery();
}
}
return new IntervalQuery(field, sourceProvider.getSource(context, fieldType));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.search.intervals.IntervalIterator;
import org.apache.lucene.search.intervals.Intervals;
import org.apache.lucene.search.intervals.IntervalsSource;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.NamedWriteable;
Expand All @@ -43,6 +44,7 @@
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Set;

import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg;
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg;
Expand All @@ -59,6 +61,8 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont

public abstract IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException;

public abstract void extractFields(Set<String> fields);

@Override
public abstract int hashCode();

Expand Down Expand Up @@ -99,13 +103,15 @@ public static class Match extends IntervalsSourceProvider {
private final boolean ordered;
private final String analyzer;
private final IntervalFilter filter;
private final String useField;

public Match(String query, int maxGaps, boolean ordered, String analyzer, IntervalFilter filter) {
public Match(String query, int maxGaps, boolean ordered, String analyzer, IntervalFilter filter, String useField) {
this.query = query;
this.maxGaps = maxGaps;
this.ordered = ordered;
this.analyzer = analyzer;
this.filter = filter;
this.useField = useField;
}

public Match(StreamInput in) throws IOException {
Expand All @@ -114,6 +120,12 @@ public Match(StreamInput in) throws IOException {
this.ordered = in.readBoolean();
this.analyzer = in.readOptionalString();
this.filter = in.readOptionalWriteable(IntervalFilter::new);
if (in.getVersion().onOrAfter(Version.V_7_1_0)) {
this.useField = in.readOptionalString();
}
else {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: you don't need the else ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

useField is final, so it needs to be set somewhere

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I missed the final, thanks

this.useField = null;
}
}

@Override
Expand All @@ -122,13 +134,28 @@ public IntervalsSource getSource(QueryShardContext context, MappedFieldType fiel
if (this.analyzer != null) {
analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer);
}
IntervalsSource source = fieldType.intervals(query, maxGaps, ordered, analyzer);
IntervalsSource source;
if (useField != null) {
fieldType = context.fieldMapper(useField);
assert fieldType != null;
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer));
}
else {
source = fieldType.intervals(query, maxGaps, ordered, analyzer);
}
if (filter != null) {
return filter.filter(source, context, fieldType);
}
return source;
}

@Override
public void extractFields(Set<String> fields) {
if (useField != null) {
fields.add(useField);
}
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
Expand All @@ -138,12 +165,13 @@ public boolean equals(Object o) {
ordered == match.ordered &&
Objects.equals(query, match.query) &&
Objects.equals(filter, match.filter) &&
Objects.equals(useField, match.useField) &&
Objects.equals(analyzer, match.analyzer);
}

@Override
public int hashCode() {
return Objects.hash(query, maxGaps, ordered, analyzer, filter);
return Objects.hash(query, maxGaps, ordered, analyzer, filter, useField);
}

@Override
Expand All @@ -158,6 +186,9 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeBoolean(ordered);
out.writeOptionalString(analyzer);
out.writeOptionalWriteable(filter);
if (out.getVersion().onOrAfter(Version.V_7_1_0)) {
out.writeOptionalString(useField);
}
}

@Override
Expand All @@ -173,6 +204,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (filter != null) {
builder.field("filter", filter);
}
if (useField != null) {
builder.field("use_field", useField);
}
return builder.endObject();
}

Expand All @@ -183,14 +217,16 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
boolean ordered = (args[2] != null && (boolean) args[2]);
String analyzer = (String) args[3];
IntervalFilter filter = (IntervalFilter) args[4];
return new Match(query, max_gaps, ordered, analyzer, filter);
String useField = (String) args[5];
return new Match(query, max_gaps, ordered, analyzer, filter, useField);
});
static {
PARSER.declareString(constructorArg(), new ParseField("query"));
PARSER.declareInt(optionalConstructorArg(), new ParseField("max_gaps"));
PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered"));
PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
PARSER.declareObject(optionalConstructorArg(), (p, c) -> IntervalFilter.fromXContent(p), new ParseField("filter"));
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
}

public static Match fromXContent(XContentParser parser) {
Expand Down Expand Up @@ -228,6 +264,13 @@ public IntervalsSource getSource(QueryShardContext ctx, MappedFieldType fieldTyp
return filter.filter(source, ctx, fieldType);
}

@Override
public void extractFields(Set<String> fields) {
for (IntervalsSourceProvider provider : subSources) {
provider.extractFields(fields);
}
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
Expand Down Expand Up @@ -323,6 +366,13 @@ public IntervalsSource getSource(QueryShardContext ctx, MappedFieldType fieldTyp
return source;
}

@Override
public void extractFields(Set<String> fields) {
for (IntervalsSourceProvider provider : subSources) {
provider.extractFields(fields);
}
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@
import org.apache.lucene.search.intervals.IntervalQuery;
import org.apache.lucene.search.intervals.Intervals;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptContext;
import org.elasticsearch.script.ScriptService;
Expand All @@ -37,6 +41,7 @@
import java.util.Collections;
import java.util.List;

import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;

Expand Down Expand Up @@ -64,7 +69,27 @@ private IntervalsSourceProvider.IntervalFilter createRandomFilter() {
return null;
}

private static final String MASKED_FIELD = "masked_field";
private static final String NO_POSITIONS_FIELD = "no_positions_field";

@Override
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
XContentBuilder mapping = jsonBuilder().startObject().startObject("_doc").startObject("properties")
.startObject(MASKED_FIELD)
.field("type", "text")
.endObject()
.startObject(NO_POSITIONS_FIELD)
.field("type", "text")
.field("index_options", "freqs")
.endObject()
.endObject().endObject().endObject();

mapperService.merge("_doc",
new CompressedXContent(Strings.toString(mapping)), MapperService.MergeReason.MAPPING_UPDATE);
}

private IntervalsSourceProvider createRandomSource() {
String useField = rarely() ? MASKED_FIELD : null;
switch (randomInt(20)) {
case 0:
case 1:
Expand Down Expand Up @@ -95,7 +120,7 @@ private IntervalsSourceProvider createRandomSource() {
boolean mOrdered = randomBoolean();
int maxMGaps = randomInt(5) - 1;
String analyzer = randomFrom("simple", "keyword", "whitespace");
return new IntervalsSourceProvider.Match(text, maxMGaps, mOrdered, analyzer, createRandomFilter());
return new IntervalsSourceProvider.Match(text, maxMGaps, mOrdered, analyzer, createRandomFilter(), useField);
}
}

Expand Down Expand Up @@ -151,6 +176,21 @@ public void testMatchInterval() throws IOException {
Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))));
assertEquals(expected, builder.toQuery(createShardContext()));

json = "{ \"intervals\" : " +
"{ \"" + STRING_FIELD_NAME + "\" : { " +
" \"match\" : { " +
" \"query\" : \"Hello world\"," +
" \"max_gaps\" : 10," +
" \"analyzer\" : \"whitespace\"," +
" \"use_field\" : \"" + MASKED_FIELD + "\"," +
" \"ordered\" : true } } } }";

builder = (IntervalQueryBuilder) parseQuery(json);
expected = new IntervalQuery(STRING_FIELD_NAME,
Intervals.fixField(MASKED_FIELD,
Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")))));
assertEquals(expected, builder.toQuery(createShardContext()));

json = "{ \"intervals\" : " +
"{ \"" + STRING_FIELD_NAME + "\" : { " +
" \"match\" : { " +
Expand Down Expand Up @@ -262,14 +302,31 @@ public void testNonIndexedFields() throws IOException {
IntervalQueryBuilder builder = new IntervalQueryBuilder(INT_FIELD_NAME, provider);
builder.doToQuery(createShardContext());
});
assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field [" + INT_FIELD_NAME + "] with no indexed positions"));
assertThat(e.getMessage(), equalTo("Can only use interval queries on text fields - not on ["
+ INT_FIELD_NAME + "] which is of type [integer]"));

e = expectThrows(IllegalArgumentException.class, () -> {
IntervalQueryBuilder builder = new IntervalQueryBuilder(NO_POSITIONS_FIELD, provider);
builder.doToQuery(createShardContext());
});
assertThat(e.getMessage(), equalTo("Cannot create intervals over field ["
+ NO_POSITIONS_FIELD + "] with no positions indexed"));

String json = "{ \"intervals\" : " +
"{ \"" + STRING_FIELD_NAME + "\" : { " +
" \"match\" : { " +
" \"query\" : \"Hello world\"," +
" \"max_gaps\" : 10," +
" \"analyzer\" : \"whitespace\"," +
" \"use_field\" : \"" + NO_POSITIONS_FIELD + "\"," +
" \"ordered\" : true } } } }";

e = expectThrows(IllegalArgumentException.class, () -> {
IntervalQueryBuilder builder = new IntervalQueryBuilder(STRING_FIELD_NAME_2, provider);
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
builder.doToQuery(createShardContext());
});
assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field ["
+ STRING_FIELD_NAME_2 + "] with no indexed positions"));
assertThat(e.getMessage(), equalTo("Cannot create intervals over field ["
+ NO_POSITIONS_FIELD + "] with no positions indexed"));
}

public void testMultipleProviders() {
Expand Down