-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for T-Test aggregations (#4732)
* Add support for T-Test aggregations Relates: #4718 Co-authored-by: Martijn Laarman <[email protected]>
- Loading branch information
1 parent
eb5545c
commit bb46691
Showing
10 changed files
with
463 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
118 changes: 118 additions & 0 deletions
118
docs/aggregations/metric/t-test/t-test-aggregation-usage.asciidoc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
:ref_current: https://www.elastic.co/guide/en/elasticsearch/reference/master | ||
|
||
:github: https://github.com/elastic/elasticsearch-net | ||
|
||
:nuget: https://www.nuget.org/packages | ||
|
||
//// | ||
IMPORTANT NOTE | ||
============== | ||
This file has been generated from https://github.com/elastic/elasticsearch-net/tree/master/src/Tests/Tests/Aggregations/Metric/TTest/TTestAggregationUsageTests.cs. | ||
If you wish to submit a PR for any spelling mistakes, typos or grammatical errors for this file, | ||
please modify the original csharp file found at the link and submit the PR with that change. Thanks! | ||
//// | ||
|
||
[[t-test-aggregation-usage]] | ||
=== T Test Aggregation Usage | ||
|
||
A t_test metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a | ||
Student’s t-distribution under the null hypothesis on numeric values extracted from the aggregated documents or | ||
generated by provided scripts. In practice, this will tell you if the difference between two population means | ||
are statistically significant and did not occur by chance alone. | ||
|
||
NOTE: Available in Elasticsearch 7.8.0+ with at least basic license level | ||
|
||
Be sure to read the Elasticsearch documentation on {ref_current}/search-aggregations-metrics-ttest-aggregation.html[T-Test Aggregation]. | ||
|
||
==== Fluent DSL example | ||
|
||
[source,csharp] | ||
---- | ||
a => a | ||
.TTest("commits_visibility", c => c | ||
.A(t => t | ||
.Field(f => f.NumberOfCommits) | ||
.Filter(f => f | ||
.Term(ff => ff.Visibility, Visibility.Public) | ||
) | ||
) | ||
.B(t => t | ||
.Field(f => f.NumberOfCommits) | ||
.Filter(f => f | ||
.Term(ff => ff.Visibility, Visibility.Private) | ||
) | ||
) | ||
.Type(TTestType.Heteroscedastic) | ||
) | ||
---- | ||
|
||
==== Object Initializer syntax example | ||
|
||
[source,csharp] | ||
---- | ||
new TTestAggregation("commits_visibility") | ||
{ | ||
A = new TTestPopulation | ||
{ | ||
Field = Field<Project>(f => f.NumberOfCommits), | ||
Filter = new TermQuery | ||
{ | ||
Field = Field<Project>(f => f.Visibility), | ||
Value = Visibility.Public | ||
} | ||
}, | ||
B = new TTestPopulation | ||
{ | ||
Field = Field<Project>(f => f.NumberOfCommits), | ||
Filter = new TermQuery | ||
{ | ||
Field = Field<Project>(f => f.Visibility), | ||
Value = Visibility.Private | ||
} | ||
}, | ||
Type = TTestType.Heteroscedastic | ||
} | ||
---- | ||
|
||
[source,javascript] | ||
.Example json output | ||
---- | ||
{ | ||
"commits_visibility": { | ||
"t_test": { | ||
"a": { | ||
"field": "numberOfCommits", | ||
"filter": { | ||
"term": { | ||
"visibility": { | ||
"value": "Public" | ||
} | ||
} | ||
} | ||
}, | ||
"b": { | ||
"field": "numberOfCommits", | ||
"filter": { | ||
"term": { | ||
"visibility": { | ||
"value": "Private" | ||
} | ||
} | ||
} | ||
}, | ||
"type": "heteroscedastic" | ||
} | ||
} | ||
} | ||
---- | ||
|
||
==== Handling Responses | ||
|
||
[source,csharp] | ||
---- | ||
response.ShouldBeValid(); | ||
var tTest = response.Aggregations.TTest("commits_visibility"); | ||
tTest.Should().NotBeNull(); | ||
tTest.Value.Should().BeGreaterThan(0); | ||
---- | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
// Licensed to Elasticsearch B.V under one or more agreements. | ||
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. | ||
// See the LICENSE file in the project root for more information | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Runtime.Serialization; | ||
using Elasticsearch.Net; | ||
using Elasticsearch.Net.Utf8Json; | ||
|
||
namespace Nest | ||
{ | ||
/// <summary> | ||
/// A metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a | ||
/// Student’s t-distribution under the null hypothesis on numeric values extracted from the aggregated documents or | ||
/// generated by provided scripts. In practice, this will tell you if the difference between two population means | ||
/// are statistically significant and did not occur by chance alone. | ||
/// <para /> | ||
/// Available in Elasticsearch 7.8.0+ with at least basic license level | ||
/// </summary> | ||
[InterfaceDataContract] | ||
[ReadAs(typeof(TTestAggregation))] | ||
public interface ITTestAggregation : IAggregation | ||
{ | ||
/// <summary> | ||
/// T-test population A | ||
/// </summary> | ||
[DataMember(Name= "a")] | ||
public ITTestPopulation A { get; set; } | ||
|
||
/// <summary> | ||
/// T-test population B | ||
/// </summary> | ||
[DataMember(Name= "b")] | ||
public ITTestPopulation B { get; set; } | ||
|
||
/// <summary> | ||
/// T-test type | ||
/// </summary> | ||
[DataMember(Name = "type")] | ||
public TTestType? Type { get; set; } | ||
} | ||
|
||
/// <inheritdoc cref="ITTestAggregation" /> | ||
public class TTestAggregation : AggregationBase, ITTestAggregation | ||
{ | ||
internal TTestAggregation() { } | ||
|
||
public TTestAggregation(string name) : base(name) { } | ||
|
||
internal override void WrapInContainer(AggregationContainer c) => c.TTest = this; | ||
|
||
/// <inheritdoc /> | ||
public ITTestPopulation A { get; set; } | ||
/// <inheritdoc /> | ||
public ITTestPopulation B { get; set; } | ||
/// <inheritdoc /> | ||
public TTestType? Type { get; set; } | ||
} | ||
|
||
/// <inheritdoc cref="ITTestAggregation" /> | ||
public class TTestAggregationDescriptor<T> | ||
: DescriptorBase<TTestAggregationDescriptor<T>, ITTestAggregation>, ITTestAggregation | ||
where T : class | ||
{ | ||
IDictionary<string, object> IAggregation.Meta { get; set; } | ||
string IAggregation.Name { get; set; } | ||
ITTestPopulation ITTestAggregation.A { get; set; } | ||
ITTestPopulation ITTestAggregation.B { get; set; } | ||
TTestType? ITTestAggregation.Type { get; set; } | ||
|
||
/// <inheritdoc cref="ITTestAggregation.A"/> | ||
public TTestAggregationDescriptor<T> A(Func<TTestPopulationDescriptor<T>, ITTestPopulation> selector) => | ||
Assign(selector, (a, v) => a.A = v?.Invoke(new TTestPopulationDescriptor<T>())); | ||
|
||
/// <inheritdoc cref="ITTestAggregation.A"/> | ||
public TTestAggregationDescriptor<T> A<TOther>(Func<TTestPopulationDescriptor<TOther>, ITTestPopulation> selector) where TOther : class => | ||
Assign(selector, (a, v) => a.A = v?.Invoke(new TTestPopulationDescriptor<TOther>())); | ||
|
||
/// <inheritdoc cref="ITTestAggregation.B"/> | ||
public TTestAggregationDescriptor<T> B(Func<TTestPopulationDescriptor<T>, ITTestPopulation> selector) => | ||
Assign(selector, (a, v) => a.B = v?.Invoke(new TTestPopulationDescriptor<T>())); | ||
|
||
/// <inheritdoc cref="ITTestAggregation.B"/> | ||
public TTestAggregationDescriptor<T> B<TOther>(Func<TTestPopulationDescriptor<TOther>, ITTestPopulation> selector) where TOther : class => | ||
Assign(selector, (a, v) => a.B = v?.Invoke(new TTestPopulationDescriptor<TOther>())); | ||
|
||
/// <inheritdoc cref="ITTestAggregation.Type"/> | ||
public TTestAggregationDescriptor<T> Type(TTestType? type) => Assign(type, (a, v) => a.Type = v); | ||
|
||
/// <inheritdoc cref="IAggregation.Meta"/> | ||
public TTestAggregationDescriptor<T> Meta(Func<FluentDictionary<string, object>, FluentDictionary<string, object>> selector) => | ||
Assign(selector, (a, v) => a.Meta = v?.Invoke(new FluentDictionary<string, object>())); | ||
} | ||
|
||
/// <summary> | ||
/// The type of t-test | ||
/// </summary> | ||
[StringEnum] | ||
public enum TTestType | ||
{ | ||
/// <summary> | ||
/// performs paired t-test | ||
/// </summary> | ||
[EnumMember(Value = "paired")] | ||
Paired, | ||
|
||
/// <summary> | ||
/// performs two-sample equal variance test | ||
/// </summary> | ||
[EnumMember(Value = "homoscedastic")] | ||
Homoscedastic, | ||
|
||
/// <summary> | ||
/// performs two-sample unequal variance test (this is default) | ||
/// </summary> | ||
[EnumMember(Value = "heteroscedastic")] | ||
Heteroscedastic, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
// Licensed to Elasticsearch B.V under one or more agreements. | ||
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. | ||
// See the LICENSE file in the project root for more information | ||
|
||
using System; | ||
using System.Linq.Expressions; | ||
using System.Runtime.Serialization; | ||
using Elasticsearch.Net.Utf8Json; | ||
|
||
namespace Nest | ||
{ | ||
/// <summary> | ||
/// A population for a <see cref="TTestAggregation"/> | ||
/// </summary> | ||
[InterfaceDataContract] | ||
[ReadAs(typeof(TTestPopulation))] | ||
public interface ITTestPopulation | ||
{ | ||
/// <summary> | ||
/// The field to use for the population values. Must be a numeric field. | ||
/// </summary> | ||
[DataMember(Name = "field")] | ||
Field Field { get; set; } | ||
|
||
/// <summary> | ||
/// A script tp use to calculate population values. | ||
/// </summary> | ||
[DataMember(Name = "script")] | ||
IScript Script { get; set; } | ||
|
||
/// <summary> | ||
/// A filter to apply to target field to filter population values. Useful | ||
/// when two populations use the same field for values, to filter the values. | ||
/// </summary> | ||
[DataMember(Name = "filter")] | ||
QueryContainer Filter { get; set; } | ||
} | ||
|
||
/// <inheritdoc /> | ||
public class TTestPopulation : ITTestPopulation | ||
{ | ||
/// <inheritdoc /> | ||
public Field Field { get; set; } | ||
/// <inheritdoc /> | ||
public IScript Script { get; set; } | ||
/// <inheritdoc /> | ||
public QueryContainer Filter { get; set; } | ||
} | ||
|
||
/// <inheritdoc cref="ITTestPopulation"/> | ||
public class TTestPopulationDescriptor<T> : DescriptorBase<TTestPopulationDescriptor<T>, ITTestPopulation>, ITTestPopulation where T : class | ||
{ | ||
Field ITTestPopulation.Field { get; set; } | ||
IScript ITTestPopulation.Script { get; set; } | ||
QueryContainer ITTestPopulation.Filter { get; set; } | ||
|
||
/// <inheritdoc cref="ITTestPopulation.Field"/> | ||
public TTestPopulationDescriptor<T> Field(Field field) => Assign(field, (a, v) => a.Field = v); | ||
|
||
/// <inheritdoc cref="ITTestPopulation.Field"/> | ||
public TTestPopulationDescriptor<T> Field<TValue>(Expression<Func<T, TValue>> field) => Assign(field, (a, v) => a.Field = v); | ||
|
||
/// <inheritdoc cref="ITTestPopulation.Script"/> | ||
public TTestPopulationDescriptor<T> Script(string script) => Assign((InlineScript)script, (a, v) => a.Script = v); | ||
|
||
/// <inheritdoc cref="ITTestPopulation.Script"/> | ||
public TTestPopulationDescriptor<T> Script(Func<ScriptDescriptor, IScript> scriptSelector) => | ||
Assign(scriptSelector, (a, v) => a.Script = v?.Invoke(new ScriptDescriptor())); | ||
|
||
/// <inheritdoc cref="ITTestPopulation.Filter"/> | ||
public TTestPopulationDescriptor<T> Filter(Func<QueryContainerDescriptor<T>, QueryContainer> filter) => | ||
Assign(filter, (a, v) => a.Filter = v?.Invoke(new QueryContainerDescriptor<T>())); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.