diff --git a/docs/aggregations.asciidoc b/docs/aggregations.asciidoc index 560d5cbcce6..74edf088ee3 100644 --- a/docs/aggregations.asciidoc +++ b/docs/aggregations.asciidoc @@ -68,6 +68,8 @@ The values are typically extracted from the fields of the document (using the fi * <> +* <> + * <> * <> @@ -110,6 +112,8 @@ include::aggregations/metric/top-hits/top-hits-aggregation-usage.asciidoc[] include::aggregations/metric/top-metrics/top-metrics-aggregation-usage.asciidoc[] +include::aggregations/metric/t-test/t-test-aggregation-usage.asciidoc[] + include::aggregations/metric/value-count/value-count-aggregation-usage.asciidoc[] include::aggregations/metric/weighted-average/weighted-average-aggregation-usage.asciidoc[] diff --git a/docs/aggregations/metric/t-test/t-test-aggregation-usage.asciidoc b/docs/aggregations/metric/t-test/t-test-aggregation-usage.asciidoc new file mode 100644 index 00000000000..9d5c6d2cb5a --- /dev/null +++ b/docs/aggregations/metric/t-test/t-test-aggregation-usage.asciidoc @@ -0,0 +1,118 @@ +:ref_current: https://www.elastic.co/guide/en/elasticsearch/reference/master + +:github: https://github.com/elastic/elasticsearch-net + +:nuget: https://www.nuget.org/packages + +//// +IMPORTANT NOTE +============== +This file has been generated from https://github.com/elastic/elasticsearch-net/tree/master/src/Tests/Tests/Aggregations/Metric/TTest/TTestAggregationUsageTests.cs. +If you wish to submit a PR for any spelling mistakes, typos or grammatical errors for this file, +please modify the original csharp file found at the link and submit the PR with that change. Thanks! +//// + +[[t-test-aggregation-usage]] +=== T Test Aggregation Usage + +A t_test metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a +Student’s t-distribution under the null hypothesis on numeric values extracted from the aggregated documents or +generated by provided scripts. In practice, this will tell you if the difference between two population means +are statistically significant and did not occur by chance alone. + +NOTE: Available in Elasticsearch 7.8.0+ with at least basic license level + +Be sure to read the Elasticsearch documentation on {ref_current}/search-aggregations-metrics-ttest-aggregation.html[T-Test Aggregation]. + +==== Fluent DSL example + +[source,csharp] +---- +a => a +.TTest("commits_visibility", c => c + .A(t => t + .Field(f => f.NumberOfCommits) + .Filter(f => f + .Term(ff => ff.Visibility, Visibility.Public) + ) + ) + .B(t => t + .Field(f => f.NumberOfCommits) + .Filter(f => f + .Term(ff => ff.Visibility, Visibility.Private) + ) + ) + .Type(TTestType.Heteroscedastic) +) +---- + +==== Object Initializer syntax example + +[source,csharp] +---- +new TTestAggregation("commits_visibility") +{ + A = new TTestPopulation + { + Field = Field(f => f.NumberOfCommits), + Filter = new TermQuery + { + Field = Field(f => f.Visibility), + Value = Visibility.Public + } + }, + B = new TTestPopulation + { + Field = Field(f => f.NumberOfCommits), + Filter = new TermQuery + { + Field = Field(f => f.Visibility), + Value = Visibility.Private + } + }, + Type = TTestType.Heteroscedastic +} +---- + +[source,javascript] +.Example json output +---- +{ + "commits_visibility": { + "t_test": { + "a": { + "field": "numberOfCommits", + "filter": { + "term": { + "visibility": { + "value": "Public" + } + } + } + }, + "b": { + "field": "numberOfCommits", + "filter": { + "term": { + "visibility": { + "value": "Private" + } + } + } + }, + "type": "heteroscedastic" + } + } +} +---- + +==== Handling Responses + +[source,csharp] +---- +response.ShouldBeValid(); +var tTest = response.Aggregations.TTest("commits_visibility"); +tTest.Should().NotBeNull(); +tTest.Value.Should().BeGreaterThan(0); +---- + diff --git a/src/Nest/Aggregations/AggregateDictionary.cs b/src/Nest/Aggregations/AggregateDictionary.cs index 698b49ee3fb..a3b18e3424c 100644 --- a/src/Nest/Aggregations/AggregateDictionary.cs +++ b/src/Nest/Aggregations/AggregateDictionary.cs @@ -244,6 +244,8 @@ public CompositeBucketAggregate Composite(string key) public BoxplotAggregate Boxplot(string key) => TryGet(key); + public ValueAggregate TTest(string key) => TryGet(key); + private TAggregate TryGet(string key) where TAggregate : class, IAggregate => BackingDictionary.TryGetValue(key, out var agg) ? agg as TAggregate : null; diff --git a/src/Nest/Aggregations/AggregationContainer.cs b/src/Nest/Aggregations/AggregationContainer.cs index ba0f05b63db..eae4420b7be 100644 --- a/src/Nest/Aggregations/AggregationContainer.cs +++ b/src/Nest/Aggregations/AggregationContainer.cs @@ -257,6 +257,10 @@ public interface IAggregationContainer [DataMember(Name = "top_hits")] ITopHitsAggregation TopHits { get; set; } + /// + [DataMember(Name = "t_test")] + ITTestAggregation TTest { get; set; } + [DataMember(Name = "value_count")] IValueCountAggregation ValueCount { get; set; } @@ -387,6 +391,9 @@ public class AggregationContainer : IAggregationContainer public ITermsAggregation Terms { get; set; } public ITopHitsAggregation TopHits { get; set; } + + public ITTestAggregation TTest { get; set; } + public IValueCountAggregation ValueCount { get; set; } public IWeightedAverageAggregation WeightedAverage { get; set; } @@ -542,6 +549,8 @@ public class AggregationContainerDescriptor : DescriptorBase, ITopHitsAggregation> selector ) => _SetInnerAggregation(name, selector, (a, d) => a.TopHits = d); + /// + public AggregationContainerDescriptor TTest(string name, + Func, ITTestAggregation> selector + ) => + _SetInnerAggregation(name, selector, (a, d) => a.TTest = d); + public AggregationContainerDescriptor Children(string name, Func, IChildrenAggregation> selector ) where TChild : class => diff --git a/src/Nest/Aggregations/Metric/TTest/TTestAggregation.cs b/src/Nest/Aggregations/Metric/TTest/TTestAggregation.cs new file mode 100644 index 00000000000..e44c1d7bf3e --- /dev/null +++ b/src/Nest/Aggregations/Metric/TTest/TTestAggregation.cs @@ -0,0 +1,120 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System; +using System.Collections.Generic; +using System.Runtime.Serialization; +using Elasticsearch.Net; +using Elasticsearch.Net.Utf8Json; + +namespace Nest +{ + /// + /// A metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a + /// Student’s t-distribution under the null hypothesis on numeric values extracted from the aggregated documents or + /// generated by provided scripts. In practice, this will tell you if the difference between two population means + /// are statistically significant and did not occur by chance alone. + /// + /// Available in Elasticsearch 7.8.0+ with at least basic license level + /// + [InterfaceDataContract] + [ReadAs(typeof(TTestAggregation))] + public interface ITTestAggregation : IAggregation + { + /// + /// T-test population A + /// + [DataMember(Name= "a")] + public ITTestPopulation A { get; set; } + + /// + /// T-test population B + /// + [DataMember(Name= "b")] + public ITTestPopulation B { get; set; } + + /// + /// T-test type + /// + [DataMember(Name = "type")] + public TTestType? Type { get; set; } + } + + /// + public class TTestAggregation : AggregationBase, ITTestAggregation + { + internal TTestAggregation() { } + + public TTestAggregation(string name) : base(name) { } + + internal override void WrapInContainer(AggregationContainer c) => c.TTest = this; + + /// + public ITTestPopulation A { get; set; } + /// + public ITTestPopulation B { get; set; } + /// + public TTestType? Type { get; set; } + } + + /// + public class TTestAggregationDescriptor + : DescriptorBase, ITTestAggregation>, ITTestAggregation + where T : class + { + IDictionary IAggregation.Meta { get; set; } + string IAggregation.Name { get; set; } + ITTestPopulation ITTestAggregation.A { get; set; } + ITTestPopulation ITTestAggregation.B { get; set; } + TTestType? ITTestAggregation.Type { get; set; } + + /// + public TTestAggregationDescriptor A(Func, ITTestPopulation> selector) => + Assign(selector, (a, v) => a.A = v?.Invoke(new TTestPopulationDescriptor())); + + /// + public TTestAggregationDescriptor A(Func, ITTestPopulation> selector) where TOther : class => + Assign(selector, (a, v) => a.A = v?.Invoke(new TTestPopulationDescriptor())); + + /// + public TTestAggregationDescriptor B(Func, ITTestPopulation> selector) => + Assign(selector, (a, v) => a.B = v?.Invoke(new TTestPopulationDescriptor())); + + /// + public TTestAggregationDescriptor B(Func, ITTestPopulation> selector) where TOther : class => + Assign(selector, (a, v) => a.B = v?.Invoke(new TTestPopulationDescriptor())); + + /// + public TTestAggregationDescriptor Type(TTestType? type) => Assign(type, (a, v) => a.Type = v); + + /// + public TTestAggregationDescriptor Meta(Func, FluentDictionary> selector) => + Assign(selector, (a, v) => a.Meta = v?.Invoke(new FluentDictionary())); + } + + /// + /// The type of t-test + /// + [StringEnum] + public enum TTestType + { + /// + /// performs paired t-test + /// + [EnumMember(Value = "paired")] + Paired, + + /// + /// performs two-sample equal variance test + /// + [EnumMember(Value = "homoscedastic")] + Homoscedastic, + + /// + /// performs two-sample unequal variance test (this is default) + /// + [EnumMember(Value = "heteroscedastic")] + Heteroscedastic, + } +} diff --git a/src/Nest/Aggregations/Metric/TTest/TTestGroup.cs b/src/Nest/Aggregations/Metric/TTest/TTestGroup.cs new file mode 100644 index 00000000000..48a8ad31df8 --- /dev/null +++ b/src/Nest/Aggregations/Metric/TTest/TTestGroup.cs @@ -0,0 +1,74 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System; +using System.Linq.Expressions; +using System.Runtime.Serialization; +using Elasticsearch.Net.Utf8Json; + +namespace Nest +{ + /// + /// A population for a + /// + [InterfaceDataContract] + [ReadAs(typeof(TTestPopulation))] + public interface ITTestPopulation + { + /// + /// The field to use for the population values. Must be a numeric field. + /// + [DataMember(Name = "field")] + Field Field { get; set; } + + /// + /// A script tp use to calculate population values. + /// + [DataMember(Name = "script")] + IScript Script { get; set; } + + /// + /// A filter to apply to target field to filter population values. Useful + /// when two populations use the same field for values, to filter the values. + /// + [DataMember(Name = "filter")] + QueryContainer Filter { get; set; } + } + + /// + public class TTestPopulation : ITTestPopulation + { + /// + public Field Field { get; set; } + /// + public IScript Script { get; set; } + /// + public QueryContainer Filter { get; set; } + } + + /// + public class TTestPopulationDescriptor : DescriptorBase, ITTestPopulation>, ITTestPopulation where T : class + { + Field ITTestPopulation.Field { get; set; } + IScript ITTestPopulation.Script { get; set; } + QueryContainer ITTestPopulation.Filter { get; set; } + + /// + public TTestPopulationDescriptor Field(Field field) => Assign(field, (a, v) => a.Field = v); + + /// + public TTestPopulationDescriptor Field(Expression> field) => Assign(field, (a, v) => a.Field = v); + + /// + public TTestPopulationDescriptor Script(string script) => Assign((InlineScript)script, (a, v) => a.Script = v); + + /// + public TTestPopulationDescriptor Script(Func scriptSelector) => + Assign(scriptSelector, (a, v) => a.Script = v?.Invoke(new ScriptDescriptor())); + + /// + public TTestPopulationDescriptor Filter(Func, QueryContainer> filter) => + Assign(filter, (a, v) => a.Filter = v?.Invoke(new QueryContainerDescriptor())); + } +} diff --git a/src/Nest/Aggregations/Metric/ValueAggregate.cs b/src/Nest/Aggregations/Metric/ValueAggregate.cs index ff48ddf532e..221c6fc298e 100644 --- a/src/Nest/Aggregations/Metric/ValueAggregate.cs +++ b/src/Nest/Aggregations/Metric/ValueAggregate.cs @@ -2,7 +2,7 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information -namespace Nest +namespace Nest { public class ValueAggregate : MetricAggregateBase { diff --git a/src/Nest/Aggregations/Visitor/AggregationVisitor.cs b/src/Nest/Aggregations/Visitor/AggregationVisitor.cs index 64572c55e95..b4572bdba9d 100644 --- a/src/Nest/Aggregations/Visitor/AggregationVisitor.cs +++ b/src/Nest/Aggregations/Visitor/AggregationVisitor.cs @@ -147,6 +147,8 @@ public interface IAggregationVisitor void Visit(IBoxplotAggregation aggregation); void Visit(ITopMetricsAggregation aggregation); + + void Visit(ITTestAggregation aggregation); } public class AggregationVisitor : IAggregationVisitor @@ -275,6 +277,8 @@ public virtual void Visit(IBoxplotAggregation aggregation) { } public virtual void Visit(ITopMetricsAggregation aggregation) { } + public virtual void Visit(ITTestAggregation aggregation) { } + public virtual void Visit(IAggregation aggregation) { } public virtual void Visit(IAggregationContainer aggregationContainer) { } diff --git a/src/Nest/Aggregations/Visitor/AggregationWalker.cs b/src/Nest/Aggregations/Visitor/AggregationWalker.cs index d173abe7264..2aa24ab3ce1 100644 --- a/src/Nest/Aggregations/Visitor/AggregationWalker.cs +++ b/src/Nest/Aggregations/Visitor/AggregationWalker.cs @@ -1,7 +1,7 @@ -// Licensed to Elasticsearch B.V under one or more agreements. -// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. -// See the LICENSE file in the project root for more information - +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + using System; namespace Nest @@ -177,6 +177,7 @@ public void Walk(IAggregationContainer aggregation, IAggregationVisitor visitor) AcceptAggregation(aggregation.GeoCentroid, visitor, (v, d) => v.Visit(d)); AcceptAggregation(aggregation.Composite, visitor, (v, d) => v.Visit(d)); AcceptAggregation(aggregation.MedianAbsoluteDeviation, visitor, (v, d) => v.Visit(d)); + AcceptAggregation(aggregation.TTest, visitor, (v, d) => v.Visit(d)); } } } diff --git a/tests/Tests/Aggregations/Metric/TTest/TTestAggregationUsageTests.cs b/tests/Tests/Aggregations/Metric/TTest/TTestAggregationUsageTests.cs new file mode 100644 index 00000000000..7063be124a8 --- /dev/null +++ b/tests/Tests/Aggregations/Metric/TTest/TTestAggregationUsageTests.cs @@ -0,0 +1,120 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System; +using Elastic.Elasticsearch.Xunit.XunitPlumbing; +using FluentAssertions; +using Nest; +using Tests.Core.Extensions; +using Tests.Core.ManagedElasticsearch.Clusters; +using Tests.Domain; +using Tests.Framework.EndpointTests.TestState; +using static Nest.Infer; + +namespace Tests.Aggregations.Metric.TTest +{ + /** + * A t_test metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a + * Student’s t-distribution under the null hypothesis on numeric values extracted from the aggregated documents or + * generated by provided scripts. In practice, this will tell you if the difference between two population means + * are statistically significant and did not occur by chance alone. + * + * NOTE: Available in Elasticsearch 7.8.0+ with at least basic license level + * + * Be sure to read the Elasticsearch documentation on {ref_current}/search-aggregations-metrics-ttest-aggregation.html[T-Test Aggregation]. + */ + [SkipVersion("<7.8.0", "Introduced in 7.8.0")] + public class TTestAggregationUsageTests : AggregationUsageTestBase + { + public TTestAggregationUsageTests(ReadOnlyCluster i, EndpointUsage usage) : base(i, usage) { } + + protected override object AggregationJson => new + { + commits_visibility = new + { + t_test = new + { + a = new + { + field = "numberOfCommits", + filter = new + { + term = new + { + visibility = new + { + value = "Public" + } + } + } + }, + b = new + { + field = "numberOfCommits", + filter = new + { + term = new + { + visibility = new + { + value = "Private" + } + } + } + }, + type = "heteroscedastic" + } + } + }; + + protected override Func, IAggregationContainer> FluentAggs => a => a + .TTest("commits_visibility", c => c + .A(t => t + .Field(f => f.NumberOfCommits) + .Filter(f => f + .Term(ff => ff.Visibility, Visibility.Public) + ) + ) + .B(t => t + .Field(f => f.NumberOfCommits) + .Filter(f => f + .Term(ff => ff.Visibility, Visibility.Private) + ) + ) + .Type(TTestType.Heteroscedastic) + ); + + protected override AggregationDictionary InitializerAggs => + new TTestAggregation("commits_visibility") + { + A = new TTestPopulation + { + Field = Field(f => f.NumberOfCommits), + Filter = new TermQuery + { + Field = Field(f => f.Visibility), + Value = Visibility.Public + } + }, + B = new TTestPopulation + { + Field = Field(f => f.NumberOfCommits), + Filter = new TermQuery + { + Field = Field(f => f.Visibility), + Value = Visibility.Private + } + }, + Type = TTestType.Heteroscedastic + }; + + protected override void ExpectResponse(ISearchResponse response) + { + response.ShouldBeValid(); + var tTest = response.Aggregations.TTest("commits_visibility"); + tTest.Should().NotBeNull(); + tTest.Value.Should().BeGreaterThan(0); + } + } +}