Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add second level of field collapsing #31808

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions docs/reference/search/request/collapse.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,105 @@ The default is based on the number of data nodes and the default search thread p

WARNING: `collapse` cannot be used in conjunction with <<search-request-scroll, scroll>>,
<<search-request-rescore, rescore>> or <<search-request-search-after, search after>>.

==== Second level of collapsing

Second level of collapsing is also supported and is applied to `inner_hits`.
For example, the following request finds the top scored tweets for
each country, and within each country finds the top scored tweets
for each user.

[source,js]
--------------------------------------------------
GET /twitter/_search
{
"query": {
"match": {
"message": "elasticsearch"
}
},
"collapse" : {
"field" : "country",
"inner_hits" : {
"name": "by_location",
"collapse" : {"field" : "user"},
"size": 3
}
}
}
--------------------------------------------------
// NOTCONSOLE


Response:
[source,js]
--------------------------------------------------
{
...
"hits": [
{
"_index": "twitter",
"_type": "_doc",
"_id": "9",
"_score": ...,
"_source": {...},
"fields": {"country": ["UK"]},
"inner_hits":{
"by_location": {
"hits": {
...,
"hits": [
{
...
"fields": {"user" : ["user124"]}
},
{
...
"fields": {"user" : ["user589"]}
},
{
...
"fields": {"user" : ["user001"]}
}
]
}
}
}
},
{
"_index": "twitter",
"_type": "_doc",
"_id": "1",
"_score": ..,
"_source": {...},
"fields": {"country": ["Canada"]},
"inner_hits":{
"by_location": {
"hits": {
...,
"hits": [
{
...
"fields": {"user" : ["user444"]}
},
{
...
"fields": {"user" : ["user1111"]}
},
{
...
"fields": {"user" : ["user999"]}
}
]
}
}
}

},
....
]
}
--------------------------------------------------
// NOTCONSOLE

NOTE: Second level of of collapsing doesn't allow `inner_hits`.
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
---
"two levels fields collapsing":
- skip:
version: " - 6.99.99"
reason: using multiple field collapsing from 7.0 on
- do:
indices.create:
index: addresses
body:
settings:
number_of_shards: 1
number_of_replicas: 1
mappings:
_doc:
properties:
country: {"type": "keyword"}
city: {"type": "keyword"}
address: {"type": "text"}

- do:
bulk:
refresh: true
body:
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "1" } }'
- '{"country" : "Canada", "city" : "Saskatoon", "address" : "701 Victoria Avenue" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "2" } }'
- '{"country" : "Canada", "city" : "Toronto", "address" : "74 Victoria Street, Suite, 74 Victoria Street, Suite 300" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "3" } }'
- '{"country" : "Canada", "city" : "Toronto", "address" : "350 Victoria St" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "4" } }'
- '{"country" : "Canada", "city" : "Toronto", "address" : "20 Victoria Street" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "5" } }'
- '{"country" : "UK", "city" : "London", "address" : "58 Victoria Street" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "6" } }'
- '{"country" : "UK", "city" : "London", "address" : "Victoria Street Victoria Palace Theatre" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "7" } }'
- '{"country" : "UK", "city" : "Manchester", "address" : "75 Victoria street Westminster" }'
- '{ "index" : { "_index" : "addresses", "_type" : "_doc", "_id" : "8" } }'
- '{"country" : "UK", "city" : "London", "address" : "Victoria Station Victoria Arcade" }'


# ************* error if internal collapse contains inner_hits
- do:
catch: /parse_exception/
search:
index: addresses
body:
query: { "match" : { "address" : "victoria" }}
collapse:
field: country
inner_hits:
collapse:
field : city
inner_hits: {}


# ************* error if internal collapse contains another collapse
- do:
catch: /parse_exception/
search:
index: addresses
body:
query: { "match" : { "address" : "victoria" }}
collapse:
field: country
inner_hits:
collapse:
field : city
collapse: { field: city }



# ************* top scored
- do:
search:
index: addresses
body:
query: { "match" : { "address" : "victoria" }}
collapse:
field: country
inner_hits:
name: by_location
size: 3
collapse:
field : city

- match: { hits.total: 8 }
- length: { hits.hits: 2 }
- match: { hits.hits.0.fields.country: ["UK"] }
- match: { hits.hits.0.inner_hits.by_location.hits.total: 4 }
# 2 inner hits returned instead of requested 3 as they are collapsed by city
- length: { hits.hits.0.inner_hits.by_location.hits.hits : 2}
- match: { hits.hits.0.inner_hits.by_location.hits.hits.0._id: "8" }
- match: { hits.hits.0.inner_hits.by_location.hits.hits.0.fields.city: ["London"] }
- match: { hits.hits.0.inner_hits.by_location.hits.hits.1._id: "7" }
- match: { hits.hits.0.inner_hits.by_location.hits.hits.1.fields.city: ["Manchester"] }

- match: { hits.hits.1.fields.country: ["Canada"] }
- match: { hits.hits.1.inner_hits.by_location.hits.total: 4 }
# 2 inner hits returned instead of requested 3 as they are collapsed by city
- length: { hits.hits.1.inner_hits.by_location.hits.hits : 2 }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.0._id: "1" }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.0.fields.city: ["Saskatoon"] }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.1._id: "3" }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.1.fields.city: ["Toronto"] }


# ************* sorted
- do:
search:
index: addresses
body:
query: { "match" : { "address" : "victoria" }}
collapse:
field: country
inner_hits:
name: by_location
size: 3
sort: [{ "city": "desc" }]
collapse:
field : city

- match: { hits.total: 8 }
- length: { hits.hits: 2 }
- match: { hits.hits.0.fields.country: ["UK"] }
- match: { hits.hits.0.inner_hits.by_location.hits.total: 4 }
# 2 inner hits returned instead of requested 3 as they are collapsed by city
- length: { hits.hits.0.inner_hits.by_location.hits.hits : 2}
- match: { hits.hits.0.inner_hits.by_location.hits.hits.0._id: "7" }
- match: { hits.hits.0.inner_hits.by_location.hits.hits.0.fields.city: ["Manchester"] }
- match: { hits.hits.0.inner_hits.by_location.hits.hits.1._id: "5" }
- match: { hits.hits.0.inner_hits.by_location.hits.hits.1.fields.city: ["London"] }

- match: { hits.hits.1.fields.country: ["Canada"] }
- match: { hits.hits.1.inner_hits.by_location.hits.total: 4 }
# 2 inner hits returned instead of requested 3 as they are collapsed by city
- length: { hits.hits.1.inner_hits.by_location.hits.hits : 2 }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.0._id: "2" }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.0.fields.city: ["Toronto"] }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.1._id: "1" }
- match: { hits.hits.1.inner_hits.by_location.hits.hits.1.fields.city: ["Saskatoon"] }
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ public void run() throws IOException {
groupQuery.must(origQuery);
}
for (InnerHitBuilder innerHitBuilder : innerHitBuilders) {
SearchSourceBuilder sourceBuilder = buildExpandSearchSourceBuilder(innerHitBuilder)
CollapseBuilder innerCollapseBuilder = innerHitBuilder.getInnerCollapseBuilder();
SearchSourceBuilder sourceBuilder = buildExpandSearchSourceBuilder(innerHitBuilder, innerCollapseBuilder)
.query(groupQuery)
.postFilter(searchRequest.source().postFilter());
SearchRequest groupRequest = buildExpandSearchRequest(searchRequest, sourceBuilder);
Expand Down Expand Up @@ -135,7 +136,7 @@ private SearchRequest buildExpandSearchRequest(SearchRequest orig, SearchSourceB
return groupRequest;
}

private SearchSourceBuilder buildExpandSearchSourceBuilder(InnerHitBuilder options) {
private SearchSourceBuilder buildExpandSearchSourceBuilder(InnerHitBuilder options, CollapseBuilder innerCollapseBuilder) {
SearchSourceBuilder groupSource = new SearchSourceBuilder();
groupSource.from(options.getFrom());
groupSource.size(options.getSize());
Expand Down Expand Up @@ -167,6 +168,9 @@ private SearchSourceBuilder buildExpandSearchSourceBuilder(InnerHitBuilder optio
groupSource.explain(options.isExplain());
groupSource.trackScores(options.isTrackScores());
groupSource.version(options.isVersion());
if (innerCollapseBuilder != null) {
groupSource.collapse(innerCollapseBuilder);
}
return groupSource;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.elasticsearch.search.fetch.subphase.FetchSourceContext;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.sort.SortBuilder;
import org.elasticsearch.search.collapse.CollapseBuilder;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -55,6 +56,8 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
public static final ParseField NAME_FIELD = new ParseField("name");
public static final ParseField IGNORE_UNMAPPED = new ParseField("ignore_unmapped");
public static final QueryBuilder DEFAULT_INNER_HIT_QUERY = new MatchAllQueryBuilder();
public static final ParseField COLLAPSE_FIELD = new ParseField("collapse");
public static final ParseField FIELD_FIELD = new ParseField("field");

private static final ObjectParser<InnerHitBuilder, Void> PARSER = new ObjectParser<>("inner_hits", InnerHitBuilder::new);

Expand Down Expand Up @@ -91,6 +94,28 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
}, SearchSourceBuilder._SOURCE_FIELD, ObjectParser.ValueType.OBJECT_ARRAY_BOOLEAN_OR_STRING);
PARSER.declareObject(InnerHitBuilder::setHighlightBuilder, (p, c) -> HighlightBuilder.fromXContent(p),
SearchSourceBuilder.HIGHLIGHT_FIELD);
PARSER.declareField((parser, builder, context) -> {
Boolean isParsedCorrectly = false;
String field;
if (parser.currentToken() == XContentParser.Token.START_OBJECT) {
if (parser.nextToken() == XContentParser.Token.FIELD_NAME) {
if (FIELD_FIELD.match(parser.currentName(), parser.getDeprecationHandler())) {
if (parser.nextToken() == XContentParser.Token.VALUE_STRING){
field = parser.text();
if (parser.nextToken() == XContentParser.Token.END_OBJECT){
isParsedCorrectly = true;
CollapseBuilder cb = new CollapseBuilder(field);
builder.setInnerCollapse(cb);
}
}
}
}
}
if (isParsedCorrectly == false) {
throw new ParsingException(parser.getTokenLocation(), "Invalid token in the inner collapse");
}

}, COLLAPSE_FIELD, ObjectParser.ValueType.OBJECT);
}

private String name;
Expand All @@ -109,6 +134,7 @@ public final class InnerHitBuilder implements Writeable, ToXContentObject {
private Set<ScriptField> scriptFields;
private HighlightBuilder highlightBuilder;
private FetchSourceContext fetchSourceContext;
private CollapseBuilder innerCollapseBuilder = null;

public InnerHitBuilder() {
this.name = null;
Expand Down Expand Up @@ -173,6 +199,9 @@ public InnerHitBuilder(StreamInput in) throws IOException {
boolean hasChildren = in.readBoolean();
assert hasChildren == false;
}
if (in.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
this.innerCollapseBuilder = in.readOptionalWriteable(CollapseBuilder::new);
}
}

@Override
Expand Down Expand Up @@ -218,6 +247,9 @@ public void writeTo(StreamOutput out) throws IOException {
}
}
out.writeOptionalWriteable(highlightBuilder);
if (out.getVersion().onOrAfter(Version.V_7_0_0_alpha1)) {
out.writeOptionalWriteable(innerCollapseBuilder);
}
}

/**
Expand Down Expand Up @@ -501,6 +533,15 @@ QueryBuilder getQuery() {
return query;
}

public InnerHitBuilder setInnerCollapse(CollapseBuilder innerCollapseBuilder) {
this.innerCollapseBuilder = innerCollapseBuilder;
return this;
}

public CollapseBuilder getInnerCollapseBuilder() {
return innerCollapseBuilder;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
Expand Down Expand Up @@ -550,6 +591,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
if (highlightBuilder != null) {
builder.field(SearchSourceBuilder.HIGHLIGHT_FIELD.getPreferredName(), highlightBuilder, params);
}
if (innerCollapseBuilder != null) {
builder.field(COLLAPSE_FIELD.getPreferredName(), innerCollapseBuilder);
}
builder.endObject();
return builder;
}
Expand All @@ -572,13 +616,14 @@ public boolean equals(Object o) {
Objects.equals(scriptFields, that.scriptFields) &&
Objects.equals(fetchSourceContext, that.fetchSourceContext) &&
Objects.equals(sorts, that.sorts) &&
Objects.equals(highlightBuilder, that.highlightBuilder);
Objects.equals(highlightBuilder, that.highlightBuilder) &&
Objects.equals(innerCollapseBuilder, that.innerCollapseBuilder);
}

@Override
public int hashCode() {
return Objects.hash(name, ignoreUnmapped, from, size, explain, version, trackScores,
storedFieldsContext, docValueFields, scriptFields, fetchSourceContext, sorts, highlightBuilder);
storedFieldsContext, docValueFields, scriptFields, fetchSourceContext, sorts, highlightBuilder, innerCollapseBuilder);
}

public static InnerHitBuilder fromXContent(XContentParser parser) throws IOException {
Expand Down