From 92fbf2f2456a63cc008cde4f94bfe4536019aa42 Mon Sep 17 00:00:00 2001 From: Martijn Laarman Date: Wed, 17 Oct 2018 06:38:04 +0200 Subject: [PATCH] Support 2nd level field collapsing (#3444) as per https://github.com/elastic/elasticsearch/pull/31808 (cherry picked from commit 7d787672972a6c39d20f43faf10ed2efe0cc9c0a) --- .../Search/Search/Collapsing/FieldCollapse.cs | 22 ++++- src/Nest/Search/Search/InnerHits/InnerHits.cs | 14 +++ src/Nest/Search/Search/SearchRequest.cs | 1 + .../Collapsing/FieldCollapseUsageTests.cs | 94 ++++++++++++++++++- 4 files changed, 123 insertions(+), 8 deletions(-) diff --git a/src/Nest/Search/Search/Collapsing/FieldCollapse.cs b/src/Nest/Search/Search/Collapsing/FieldCollapse.cs index 9f857b94d37..378c1fd726d 100644 --- a/src/Nest/Search/Search/Collapsing/FieldCollapse.cs +++ b/src/Nest/Search/Search/Collapsing/FieldCollapse.cs @@ -27,38 +27,52 @@ public interface IFieldCollapse [JsonProperty("inner_hits")] IInnerHits InnerHits { get; set; } + /// + /// The expansion of the group is done by sending an additional query for each inner_hit request for each collapsed hit returned + /// in the response. This can significantly slow things down if you have too many groups and/or inner_hit requests. + /// The max_concurrent_group_searches request parameter can be used to control the maximum number of + /// concurrent searches allowed in this phase. The default is based on the number of data nodes and the + /// default search thread pool size. + /// [JsonProperty("max_concurrent_group_searches")] int? MaxConcurrentGroupSearches { get; set; } } - /// + /// public class FieldCollapse : IFieldCollapse { - /// + /// public Field Field { get; set; } + /// public IInnerHits InnerHits { get; set; } + /// public int? MaxConcurrentGroupSearches { get; set; } + } - /// + /// public class FieldCollapseDescriptor : DescriptorBase, IFieldCollapse>, IFieldCollapse where T : class { - /// Field IFieldCollapse.Field { get; set; } IInnerHits IFieldCollapse.InnerHits { get; set; } int? IFieldCollapse.MaxConcurrentGroupSearches { get; set; } + /// public FieldCollapseDescriptor MaxConcurrentGroupSearches(int? maxConcurrentGroupSearches) => Assign(a => a.MaxConcurrentGroupSearches = maxConcurrentGroupSearches); + /// public FieldCollapseDescriptor Field(Field field) => Assign(a => a.Field = field); + /// public FieldCollapseDescriptor Field(Expression> objectPath) => Assign(a => a.Field = objectPath); + /// public FieldCollapseDescriptor InnerHits(Func, IInnerHits> selector = null) => Assign(a => a.InnerHits = selector.InvokeOrDefault(new InnerHitsDescriptor())); + } } diff --git a/src/Nest/Search/Search/InnerHits/InnerHits.cs b/src/Nest/Search/Search/InnerHits/InnerHits.cs index fc27b41219d..527894aedbf 100644 --- a/src/Nest/Search/Search/InnerHits/InnerHits.cs +++ b/src/Nest/Search/Search/InnerHits/InnerHits.cs @@ -41,6 +41,12 @@ public interface IInnerHits [JsonProperty("ignore_unmapped")] bool? IgnoreUnmapped { get; set; } + + /// + /// Provides a second level of collapsing, NOTE: Elasticsearch only supports collapsing up to two levels. + /// + [JsonProperty("collapse")] + IFieldCollapse Collapse { get; set; } } public class InnerHits : IInnerHits @@ -66,6 +72,9 @@ public class InnerHits : IInnerHits public Fields DocValueFields { get; set; } public bool? IgnoreUnmapped { get; set; } + + /// + public IFieldCollapse Collapse { get; set; } } [JsonObject(MemberSerialization = MemberSerialization.OptIn)] @@ -82,6 +91,7 @@ public class InnerHitsDescriptor : DescriptorBase, IIn IScriptFields IInnerHits.ScriptFields { get; set; } Fields IInnerHits.DocValueFields { get; set; } bool? IInnerHits.IgnoreUnmapped { get; set; } + IFieldCollapse IInnerHits.Collapse { get; set; } public InnerHitsDescriptor From(int? from) => Assign(a => a.From = from); @@ -115,5 +125,9 @@ public InnerHitsDescriptor DocValueFields(Func, IPromise< public InnerHitsDescriptor DocValueFields(Fields fields) => Assign(a => a.DocValueFields = fields); public InnerHitsDescriptor IgnoreUnmapped(bool? ignoreUnmapped = true) => Assign(a => a.IgnoreUnmapped = ignoreUnmapped); + + /// + public InnerHitsDescriptor Collapse(Func, IFieldCollapse> collapseSelector) => + Assign(a => a.Collapse = collapseSelector?.Invoke(new FieldCollapseDescriptor())); } } diff --git a/src/Nest/Search/Search/SearchRequest.cs b/src/Nest/Search/Search/SearchRequest.cs index 3cb5b9a95c6..a573cfe4cbe 100644 --- a/src/Nest/Search/Search/SearchRequest.cs +++ b/src/Nest/Search/Search/SearchRequest.cs @@ -399,6 +399,7 @@ public SearchDescriptor Highlight(Func, IHighlight> hi /// For instance the query below retrieves the best tweet for each user and sorts them by number of likes. /// /// NOTE: The collapsing is applied to the top hits only and does not affect aggregations. + /// You can only collapse to a depth of 2. /// /// public SearchDescriptor Collapse(Func, IFieldCollapse> collapseSelector) => diff --git a/src/Tests/Tests/Search/Search/Collapsing/FieldCollapseUsageTests.cs b/src/Tests/Tests/Search/Search/Collapsing/FieldCollapseUsageTests.cs index ff101ef5835..76e48bf1818 100644 --- a/src/Tests/Tests/Search/Search/Collapsing/FieldCollapseUsageTests.cs +++ b/src/Tests/Tests/Search/Search/Collapsing/FieldCollapseUsageTests.cs @@ -1,4 +1,5 @@ using System; +using Elastic.Xunit.XunitPlumbing; using FluentAssertions; using Nest; using Tests.Core.Extensions; @@ -6,14 +7,10 @@ using Tests.Core.ManagedElasticsearch.NodeSeeders; using Tests.Domain; using Tests.Framework.Integration; -using Tests.Framework.ManagedElasticsearch.Clusters; -using Tests.Framework.ManagedElasticsearch.NodeSeeders; using static Nest.Infer; namespace Tests.Search.Search.Collapsing { - /** - */ public class FieldCollapseUsageTests : SearchUsageTestBase { protected override string UrlPath => $"/{DefaultSeeder.ProjectsAliasFilter}/doc/_search"; @@ -77,4 +74,93 @@ protected override void ExpectResponse(ISearchResponse response) } } } + + [SkipVersion("<6.4.0", "2nd level collapsing is a new feature in 6.4.0")] + public class FieldCollapseSecondLevelUsageTests : SearchUsageTestBase + { + protected override string UrlPath => $"/{DefaultSeeder.ProjectsAliasFilter}/doc/_search"; + + public FieldCollapseSecondLevelUsageTests(ReadOnlyCluster cluster, EndpointUsage usage) : base(cluster, usage) { } + + protected override object ExpectJson => new + { + _source = new { excludes = new [] { "*" } }, + collapse = new { + field = "state", + inner_hits = new { + _source = new { + excludes = new [] { "*" } + }, + collapse = new { + field = "name" + }, + from = 1, + name = "stateofbeing", + size = 5 + }, + max_concurrent_group_searches = 1000 + } + }; + + protected override Func, ISearchRequest> Fluent => s => s + .Source(source=>source.ExcludeAll()) + .Index(DefaultSeeder.ProjectsAliasFilter) + .Collapse(c => c + .Field(f => f.State) + .MaxConcurrentGroupSearches(1000) + .InnerHits(i => i + .Source(source=>source.ExcludeAll()) + .Name(nameof(StateOfBeing).ToLowerInvariant()) + .Size(5) + .From(1) + .Collapse(c2 => c2 + .Field(p=>p.Name) + ) + ) + ); + + protected override SearchRequest Initializer => new SearchRequest(DefaultSeeder.ProjectsAliasFilter) + { + Source = SourceFilter.ExcludeAll, + Collapse = new FieldCollapse + { + Field = Field(p => p.State), + MaxConcurrentGroupSearches = 1000, + InnerHits = new InnerHits + { + Source = SourceFilter.ExcludeAll, + Name = nameof(StateOfBeing).ToLowerInvariant(), + Size = 5, + From = 1, + Collapse = new FieldCollapse + { + Field = Field(p=>p.Name) + } + } + } + }; + + protected override void ExpectResponse(ISearchResponse response) + { + var numberOfStates = Enum.GetValues(typeof(StateOfBeing)).Length; + response.HitsMetadata.Total.Should().BeGreaterThan(numberOfStates); + response.Hits.Count.Should().Be(numberOfStates); + foreach (var hit in response.Hits) + { + var name = nameof(StateOfBeing).ToLowerInvariant(); + hit.InnerHits.Should().NotBeNull().And.ContainKey(name); + var innerHits = hit.InnerHits[name]; + innerHits.Hits.Total.Should().BeGreaterThan(0); + var i = 0; + foreach (var innerHit in innerHits.Hits.Hits) + { + i++; + innerHit.Fields.Should().NotBeEmpty() + .And.ContainKey("name"); + } + + i.Should().NotBe(0, "we expect to inspect 2nd level collapsed fields"); + } + } + } }