Skip to content

Commit

Permalink
Add support for T-Test aggregations (#4732) (#4765)
Browse files Browse the repository at this point in the history
* Add support for T-Test aggregations

Relates: #4718

Co-authored-by: Russ Cam <[email protected]>
Co-authored-by: Martijn Laarman <[email protected]>
  • Loading branch information
3 people authored Jun 10, 2020
1 parent eb5545c commit 7428f27
Show file tree
Hide file tree
Showing 10 changed files with 463 additions and 5 deletions.
4 changes: 4 additions & 0 deletions docs/aggregations.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ The values are typically extracted from the fields of the document (using the fi

* <<top-metrics-aggregation-usage,Top Metrics Aggregation Usage>>

* <<t-test-aggregation-usage,T Test Aggregation Usage>>

* <<value-count-aggregation-usage,Value Count Aggregation Usage>>

* <<weighted-average-aggregation-usage,Weighted Average Aggregation Usage>>
Expand Down Expand Up @@ -110,6 +112,8 @@ include::aggregations/metric/top-hits/top-hits-aggregation-usage.asciidoc[]

include::aggregations/metric/top-metrics/top-metrics-aggregation-usage.asciidoc[]

include::aggregations/metric/t-test/t-test-aggregation-usage.asciidoc[]

include::aggregations/metric/value-count/value-count-aggregation-usage.asciidoc[]

include::aggregations/metric/weighted-average/weighted-average-aggregation-usage.asciidoc[]
Expand Down
118 changes: 118 additions & 0 deletions docs/aggregations/metric/t-test/t-test-aggregation-usage.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
:ref_current: https://www.elastic.co/guide/en/elasticsearch/reference/master

:github: https://github.com/elastic/elasticsearch-net

:nuget: https://www.nuget.org/packages

////
IMPORTANT NOTE
==============
This file has been generated from https://github.com/elastic/elasticsearch-net/tree/master/src/Tests/Tests/Aggregations/Metric/TTest/TTestAggregationUsageTests.cs.
If you wish to submit a PR for any spelling mistakes, typos or grammatical errors for this file,
please modify the original csharp file found at the link and submit the PR with that change. Thanks!
////

[[t-test-aggregation-usage]]
=== T Test Aggregation Usage

A t_test metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a
Student’s t-distribution under the null hypothesis on numeric values extracted from the aggregated documents or
generated by provided scripts. In practice, this will tell you if the difference between two population means
are statistically significant and did not occur by chance alone.

NOTE: Available in Elasticsearch 7.8.0+ with at least basic license level

Be sure to read the Elasticsearch documentation on {ref_current}/search-aggregations-metrics-ttest-aggregation.html[T-Test Aggregation].

==== Fluent DSL example

[source,csharp]
----
a => a
.TTest("commits_visibility", c => c
.A(t => t
.Field(f => f.NumberOfCommits)
.Filter(f => f
.Term(ff => ff.Visibility, Visibility.Public)
)
)
.B(t => t
.Field(f => f.NumberOfCommits)
.Filter(f => f
.Term(ff => ff.Visibility, Visibility.Private)
)
)
.Type(TTestType.Heteroscedastic)
)
----

==== Object Initializer syntax example

[source,csharp]
----
new TTestAggregation("commits_visibility")
{
A = new TTestPopulation
{
Field = Field<Project>(f => f.NumberOfCommits),
Filter = new TermQuery
{
Field = Field<Project>(f => f.Visibility),
Value = Visibility.Public
}
},
B = new TTestPopulation
{
Field = Field<Project>(f => f.NumberOfCommits),
Filter = new TermQuery
{
Field = Field<Project>(f => f.Visibility),
Value = Visibility.Private
}
},
Type = TTestType.Heteroscedastic
}
----

[source,javascript]
.Example json output
----
{
"commits_visibility": {
"t_test": {
"a": {
"field": "numberOfCommits",
"filter": {
"term": {
"visibility": {
"value": "Public"
}
}
}
},
"b": {
"field": "numberOfCommits",
"filter": {
"term": {
"visibility": {
"value": "Private"
}
}
}
},
"type": "heteroscedastic"
}
}
}
----

==== Handling Responses

[source,csharp]
----
response.ShouldBeValid();
var tTest = response.Aggregations.TTest("commits_visibility");
tTest.Should().NotBeNull();
tTest.Value.Should().BeGreaterThan(0);
----

2 changes: 2 additions & 0 deletions src/Nest/Aggregations/AggregateDictionary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,8 @@ public CompositeBucketAggregate Composite(string key)

public BoxplotAggregate Boxplot(string key) => TryGet<BoxplotAggregate>(key);

public ValueAggregate TTest(string key) => TryGet<ValueAggregate>(key);

private TAggregate TryGet<TAggregate>(string key) where TAggregate : class, IAggregate =>
BackingDictionary.TryGetValue(key, out var agg) ? agg as TAggregate : null;

Expand Down
15 changes: 15 additions & 0 deletions src/Nest/Aggregations/AggregationContainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,10 @@ public interface IAggregationContainer
[DataMember(Name = "top_hits")]
ITopHitsAggregation TopHits { get; set; }

/// <inheritdoc cref="ITTestAggregation"/>
[DataMember(Name = "t_test")]
ITTestAggregation TTest { get; set; }

[DataMember(Name = "value_count")]
IValueCountAggregation ValueCount { get; set; }

Expand Down Expand Up @@ -387,6 +391,9 @@ public class AggregationContainer : IAggregationContainer
public ITermsAggregation Terms { get; set; }

public ITopHitsAggregation TopHits { get; set; }

public ITTestAggregation TTest { get; set; }

public IValueCountAggregation ValueCount { get; set; }

public IWeightedAverageAggregation WeightedAverage { get; set; }
Expand Down Expand Up @@ -542,6 +549,8 @@ public class AggregationContainerDescriptor<T> : DescriptorBase<AggregationConta

ITopHitsAggregation IAggregationContainer.TopHits { get; set; }

ITTestAggregation IAggregationContainer.TTest { get; set; }

IValueCountAggregation IAggregationContainer.ValueCount { get; set; }

IWeightedAverageAggregation IAggregationContainer.WeightedAverage { get; set; }
Expand Down Expand Up @@ -719,6 +728,12 @@ Func<TopHitsAggregationDescriptor<T>, ITopHitsAggregation> selector
) =>
_SetInnerAggregation(name, selector, (a, d) => a.TopHits = d);

/// <inheritdoc cref="ITTestAggregation"/>
public AggregationContainerDescriptor<T> TTest(string name,
Func<TTestAggregationDescriptor<T>, ITTestAggregation> selector
) =>
_SetInnerAggregation(name, selector, (a, d) => a.TTest = d);

public AggregationContainerDescriptor<T> Children<TChild>(string name,
Func<ChildrenAggregationDescriptor<TChild>, IChildrenAggregation> selector
) where TChild : class =>
Expand Down
120 changes: 120 additions & 0 deletions src/Nest/Aggregations/Metric/TTest/TTestAggregation.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Licensed to Elasticsearch B.V under one or more agreements.
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System;
using System.Collections.Generic;
using System.Runtime.Serialization;
using Elasticsearch.Net;
using Elasticsearch.Net.Utf8Json;

namespace Nest
{
/// <summary>
/// A metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a
/// Student’s t-distribution under the null hypothesis on numeric values extracted from the aggregated documents or
/// generated by provided scripts. In practice, this will tell you if the difference between two population means
/// are statistically significant and did not occur by chance alone.
/// <para />
/// Available in Elasticsearch 7.8.0+ with at least basic license level
/// </summary>
[InterfaceDataContract]
[ReadAs(typeof(TTestAggregation))]
public interface ITTestAggregation : IAggregation
{
/// <summary>
/// T-test population A
/// </summary>
[DataMember(Name= "a")]
public ITTestPopulation A { get; set; }

/// <summary>
/// T-test population B
/// </summary>
[DataMember(Name= "b")]
public ITTestPopulation B { get; set; }

/// <summary>
/// T-test type
/// </summary>
[DataMember(Name = "type")]
public TTestType? Type { get; set; }
}

/// <inheritdoc cref="ITTestAggregation" />
public class TTestAggregation : AggregationBase, ITTestAggregation
{
internal TTestAggregation() { }

public TTestAggregation(string name) : base(name) { }

internal override void WrapInContainer(AggregationContainer c) => c.TTest = this;

/// <inheritdoc />
public ITTestPopulation A { get; set; }
/// <inheritdoc />
public ITTestPopulation B { get; set; }
/// <inheritdoc />
public TTestType? Type { get; set; }
}

/// <inheritdoc cref="ITTestAggregation" />
public class TTestAggregationDescriptor<T>
: DescriptorBase<TTestAggregationDescriptor<T>, ITTestAggregation>, ITTestAggregation
where T : class
{
IDictionary<string, object> IAggregation.Meta { get; set; }
string IAggregation.Name { get; set; }
ITTestPopulation ITTestAggregation.A { get; set; }
ITTestPopulation ITTestAggregation.B { get; set; }
TTestType? ITTestAggregation.Type { get; set; }

/// <inheritdoc cref="ITTestAggregation.A"/>
public TTestAggregationDescriptor<T> A(Func<TTestPopulationDescriptor<T>, ITTestPopulation> selector) =>
Assign(selector, (a, v) => a.A = v?.Invoke(new TTestPopulationDescriptor<T>()));

/// <inheritdoc cref="ITTestAggregation.A"/>
public TTestAggregationDescriptor<T> A<TOther>(Func<TTestPopulationDescriptor<TOther>, ITTestPopulation> selector) where TOther : class =>
Assign(selector, (a, v) => a.A = v?.Invoke(new TTestPopulationDescriptor<TOther>()));

/// <inheritdoc cref="ITTestAggregation.B"/>
public TTestAggregationDescriptor<T> B(Func<TTestPopulationDescriptor<T>, ITTestPopulation> selector) =>
Assign(selector, (a, v) => a.B = v?.Invoke(new TTestPopulationDescriptor<T>()));

/// <inheritdoc cref="ITTestAggregation.B"/>
public TTestAggregationDescriptor<T> B<TOther>(Func<TTestPopulationDescriptor<TOther>, ITTestPopulation> selector) where TOther : class =>
Assign(selector, (a, v) => a.B = v?.Invoke(new TTestPopulationDescriptor<TOther>()));

/// <inheritdoc cref="ITTestAggregation.Type"/>
public TTestAggregationDescriptor<T> Type(TTestType? type) => Assign(type, (a, v) => a.Type = v);

/// <inheritdoc cref="IAggregation.Meta"/>
public TTestAggregationDescriptor<T> Meta(Func<FluentDictionary<string, object>, FluentDictionary<string, object>> selector) =>
Assign(selector, (a, v) => a.Meta = v?.Invoke(new FluentDictionary<string, object>()));
}

/// <summary>
/// The type of t-test
/// </summary>
[StringEnum]
public enum TTestType
{
/// <summary>
/// performs paired t-test
/// </summary>
[EnumMember(Value = "paired")]
Paired,

/// <summary>
/// performs two-sample equal variance test
/// </summary>
[EnumMember(Value = "homoscedastic")]
Homoscedastic,

/// <summary>
/// performs two-sample unequal variance test (this is default)
/// </summary>
[EnumMember(Value = "heteroscedastic")]
Heteroscedastic,
}
}
74 changes: 74 additions & 0 deletions src/Nest/Aggregations/Metric/TTest/TTestGroup.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Licensed to Elasticsearch B.V under one or more agreements.
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System;
using System.Linq.Expressions;
using System.Runtime.Serialization;
using Elasticsearch.Net.Utf8Json;

namespace Nest
{
/// <summary>
/// A population for a <see cref="TTestAggregation"/>
/// </summary>
[InterfaceDataContract]
[ReadAs(typeof(TTestPopulation))]
public interface ITTestPopulation
{
/// <summary>
/// The field to use for the population values. Must be a numeric field.
/// </summary>
[DataMember(Name = "field")]
Field Field { get; set; }

/// <summary>
/// A script tp use to calculate population values.
/// </summary>
[DataMember(Name = "script")]
IScript Script { get; set; }

/// <summary>
/// A filter to apply to target field to filter population values. Useful
/// when two populations use the same field for values, to filter the values.
/// </summary>
[DataMember(Name = "filter")]
QueryContainer Filter { get; set; }
}

/// <inheritdoc />
public class TTestPopulation : ITTestPopulation
{
/// <inheritdoc />
public Field Field { get; set; }
/// <inheritdoc />
public IScript Script { get; set; }
/// <inheritdoc />
public QueryContainer Filter { get; set; }
}

/// <inheritdoc cref="ITTestPopulation"/>
public class TTestPopulationDescriptor<T> : DescriptorBase<TTestPopulationDescriptor<T>, ITTestPopulation>, ITTestPopulation where T : class
{
Field ITTestPopulation.Field { get; set; }
IScript ITTestPopulation.Script { get; set; }
QueryContainer ITTestPopulation.Filter { get; set; }

/// <inheritdoc cref="ITTestPopulation.Field"/>
public TTestPopulationDescriptor<T> Field(Field field) => Assign(field, (a, v) => a.Field = v);

/// <inheritdoc cref="ITTestPopulation.Field"/>
public TTestPopulationDescriptor<T> Field<TValue>(Expression<Func<T, TValue>> field) => Assign(field, (a, v) => a.Field = v);

/// <inheritdoc cref="ITTestPopulation.Script"/>
public TTestPopulationDescriptor<T> Script(string script) => Assign((InlineScript)script, (a, v) => a.Script = v);

/// <inheritdoc cref="ITTestPopulation.Script"/>
public TTestPopulationDescriptor<T> Script(Func<ScriptDescriptor, IScript> scriptSelector) =>
Assign(scriptSelector, (a, v) => a.Script = v?.Invoke(new ScriptDescriptor()));

/// <inheritdoc cref="ITTestPopulation.Filter"/>
public TTestPopulationDescriptor<T> Filter(Func<QueryContainerDescriptor<T>, QueryContainer> filter) =>
Assign(filter, (a, v) => a.Filter = v?.Invoke(new QueryContainerDescriptor<T>()));
}
}
2 changes: 1 addition & 1 deletion src/Nest/Aggregations/Metric/ValueAggregate.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

namespace Nest
namespace Nest
{
public class ValueAggregate : MetricAggregateBase
{
Expand Down
Loading

0 comments on commit 7428f27

Please sign in to comment.