Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for T-Test aggregations #4732

Merged
merged 2 commits into from
Jun 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/aggregations.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ The values are typically extracted from the fields of the document (using the fi

* <<top-metrics-aggregation-usage,Top Metrics Aggregation Usage>>

* <<t-test-aggregation-usage,T Test Aggregation Usage>>

* <<value-count-aggregation-usage,Value Count Aggregation Usage>>

* <<weighted-average-aggregation-usage,Weighted Average Aggregation Usage>>
Expand Down Expand Up @@ -110,6 +112,8 @@ include::aggregations/metric/top-hits/top-hits-aggregation-usage.asciidoc[]

include::aggregations/metric/top-metrics/top-metrics-aggregation-usage.asciidoc[]

include::aggregations/metric/t-test/t-test-aggregation-usage.asciidoc[]

include::aggregations/metric/value-count/value-count-aggregation-usage.asciidoc[]

include::aggregations/metric/weighted-average/weighted-average-aggregation-usage.asciidoc[]
Expand Down
118 changes: 118 additions & 0 deletions docs/aggregations/metric/t-test/t-test-aggregation-usage.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
:ref_current: https://www.elastic.co/guide/en/elasticsearch/reference/master

:github: https://github.com/elastic/elasticsearch-net

:nuget: https://www.nuget.org/packages

////
IMPORTANT NOTE
==============
This file has been generated from https://github.com/elastic/elasticsearch-net/tree/master/src/Tests/Tests/Aggregations/Metric/TTest/TTestAggregationUsageTests.cs.
If you wish to submit a PR for any spelling mistakes, typos or grammatical errors for this file,
please modify the original csharp file found at the link and submit the PR with that change. Thanks!
////

[[t-test-aggregation-usage]]
=== T Test Aggregation Usage

A t_test metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a
Student’s t-distribution under the null hypothesis on numeric values extracted from the aggregated documents or
generated by provided scripts. In practice, this will tell you if the difference between two population means
are statistically significant and did not occur by chance alone.

NOTE: Available in Elasticsearch 7.8.0+ with at least basic license level

Be sure to read the Elasticsearch documentation on {ref_current}/search-aggregations-metrics-ttest-aggregation.html[T-Test Aggregation].

==== Fluent DSL example

[source,csharp]
----
a => a
.TTest("commits_visibility", c => c
.A(t => t
.Field(f => f.NumberOfCommits)
.Filter(f => f
.Term(ff => ff.Visibility, Visibility.Public)
)
)
.B(t => t
.Field(f => f.NumberOfCommits)
.Filter(f => f
.Term(ff => ff.Visibility, Visibility.Private)
)
)
.Type(TTestType.Heteroscedastic)
)
----

==== Object Initializer syntax example

[source,csharp]
----
new TTestAggregation("commits_visibility")
{
A = new TTestPopulation
{
Field = Field<Project>(f => f.NumberOfCommits),
Filter = new TermQuery
{
Field = Field<Project>(f => f.Visibility),
Value = Visibility.Public
}
},
B = new TTestPopulation
{
Field = Field<Project>(f => f.NumberOfCommits),
Filter = new TermQuery
{
Field = Field<Project>(f => f.Visibility),
Value = Visibility.Private
}
},
Type = TTestType.Heteroscedastic
}
----

[source,javascript]
.Example json output
----
{
"commits_visibility": {
"t_test": {
"a": {
"field": "numberOfCommits",
"filter": {
"term": {
"visibility": {
"value": "Public"
}
}
}
},
"b": {
"field": "numberOfCommits",
"filter": {
"term": {
"visibility": {
"value": "Private"
}
}
}
},
"type": "heteroscedastic"
}
}
}
----

==== Handling Responses

[source,csharp]
----
response.ShouldBeValid();
var tTest = response.Aggregations.TTest("commits_visibility");
tTest.Should().NotBeNull();
tTest.Value.Should().BeGreaterThan(0);
----

2 changes: 2 additions & 0 deletions src/Nest/Aggregations/AggregateDictionary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,8 @@ public CompositeBucketAggregate Composite(string key)

public BoxplotAggregate Boxplot(string key) => TryGet<BoxplotAggregate>(key);

public ValueAggregate TTest(string key) => TryGet<ValueAggregate>(key);

private TAggregate TryGet<TAggregate>(string key) where TAggregate : class, IAggregate =>
BackingDictionary.TryGetValue(key, out var agg) ? agg as TAggregate : null;

Expand Down
15 changes: 15 additions & 0 deletions src/Nest/Aggregations/AggregationContainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,10 @@ public interface IAggregationContainer
[DataMember(Name = "top_hits")]
ITopHitsAggregation TopHits { get; set; }

/// <inheritdoc cref="ITTestAggregation"/>
[DataMember(Name = "t_test")]
ITTestAggregation TTest { get; set; }

[DataMember(Name = "value_count")]
IValueCountAggregation ValueCount { get; set; }

Expand Down Expand Up @@ -387,6 +391,9 @@ public class AggregationContainer : IAggregationContainer
public ITermsAggregation Terms { get; set; }

public ITopHitsAggregation TopHits { get; set; }

public ITTestAggregation TTest { get; set; }

public IValueCountAggregation ValueCount { get; set; }

public IWeightedAverageAggregation WeightedAverage { get; set; }
Expand Down Expand Up @@ -542,6 +549,8 @@ public class AggregationContainerDescriptor<T> : DescriptorBase<AggregationConta

ITopHitsAggregation IAggregationContainer.TopHits { get; set; }

ITTestAggregation IAggregationContainer.TTest { get; set; }

IValueCountAggregation IAggregationContainer.ValueCount { get; set; }

IWeightedAverageAggregation IAggregationContainer.WeightedAverage { get; set; }
Expand Down Expand Up @@ -719,6 +728,12 @@ Func<TopHitsAggregationDescriptor<T>, ITopHitsAggregation> selector
) =>
_SetInnerAggregation(name, selector, (a, d) => a.TopHits = d);

/// <inheritdoc cref="ITTestAggregation"/>
public AggregationContainerDescriptor<T> TTest(string name,
Func<TTestAggregationDescriptor<T>, ITTestAggregation> selector
) =>
_SetInnerAggregation(name, selector, (a, d) => a.TTest = d);

public AggregationContainerDescriptor<T> Children<TChild>(string name,
Func<ChildrenAggregationDescriptor<TChild>, IChildrenAggregation> selector
) where TChild : class =>
Expand Down
120 changes: 120 additions & 0 deletions src/Nest/Aggregations/Metric/TTest/TTestAggregation.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Licensed to Elasticsearch B.V under one or more agreements.
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System;
using System.Collections.Generic;
using System.Runtime.Serialization;
using Elasticsearch.Net;
using Elasticsearch.Net.Utf8Json;

namespace Nest
{
/// <summary>
/// A metrics aggregation that performs a statistical hypothesis test in which the test statistic follows a
/// Student’s t-distribution under the null hypothesis on numeric values extracted from the aggregated documents or
/// generated by provided scripts. In practice, this will tell you if the difference between two population means
/// are statistically significant and did not occur by chance alone.
/// <para />
/// Available in Elasticsearch 7.8.0+ with at least basic license level
/// </summary>
[InterfaceDataContract]
[ReadAs(typeof(TTestAggregation))]
public interface ITTestAggregation : IAggregation
{
/// <summary>
/// T-test population A
/// </summary>
[DataMember(Name= "a")]
public ITTestPopulation A { get; set; }

/// <summary>
/// T-test population B
/// </summary>
[DataMember(Name= "b")]
public ITTestPopulation B { get; set; }

/// <summary>
/// T-test type
/// </summary>
[DataMember(Name = "type")]
public TTestType? Type { get; set; }
}

/// <inheritdoc cref="ITTestAggregation" />
public class TTestAggregation : AggregationBase, ITTestAggregation
{
internal TTestAggregation() { }

public TTestAggregation(string name) : base(name) { }

internal override void WrapInContainer(AggregationContainer c) => c.TTest = this;

/// <inheritdoc />
public ITTestPopulation A { get; set; }
/// <inheritdoc />
public ITTestPopulation B { get; set; }
/// <inheritdoc />
public TTestType? Type { get; set; }
}

/// <inheritdoc cref="ITTestAggregation" />
public class TTestAggregationDescriptor<T>
: DescriptorBase<TTestAggregationDescriptor<T>, ITTestAggregation>, ITTestAggregation
where T : class
{
IDictionary<string, object> IAggregation.Meta { get; set; }
string IAggregation.Name { get; set; }
ITTestPopulation ITTestAggregation.A { get; set; }
ITTestPopulation ITTestAggregation.B { get; set; }
TTestType? ITTestAggregation.Type { get; set; }

/// <inheritdoc cref="ITTestAggregation.A"/>
public TTestAggregationDescriptor<T> A(Func<TTestPopulationDescriptor<T>, ITTestPopulation> selector) =>
Assign(selector, (a, v) => a.A = v?.Invoke(new TTestPopulationDescriptor<T>()));

/// <inheritdoc cref="ITTestAggregation.A"/>
public TTestAggregationDescriptor<T> A<TOther>(Func<TTestPopulationDescriptor<TOther>, ITTestPopulation> selector) where TOther : class =>
Assign(selector, (a, v) => a.A = v?.Invoke(new TTestPopulationDescriptor<TOther>()));

/// <inheritdoc cref="ITTestAggregation.B"/>
public TTestAggregationDescriptor<T> B(Func<TTestPopulationDescriptor<T>, ITTestPopulation> selector) =>
Assign(selector, (a, v) => a.B = v?.Invoke(new TTestPopulationDescriptor<T>()));

/// <inheritdoc cref="ITTestAggregation.B"/>
public TTestAggregationDescriptor<T> B<TOther>(Func<TTestPopulationDescriptor<TOther>, ITTestPopulation> selector) where TOther : class =>
Assign(selector, (a, v) => a.B = v?.Invoke(new TTestPopulationDescriptor<TOther>()));

/// <inheritdoc cref="ITTestAggregation.Type"/>
public TTestAggregationDescriptor<T> Type(TTestType? type) => Assign(type, (a, v) => a.Type = v);

/// <inheritdoc cref="IAggregation.Meta"/>
public TTestAggregationDescriptor<T> Meta(Func<FluentDictionary<string, object>, FluentDictionary<string, object>> selector) =>
Assign(selector, (a, v) => a.Meta = v?.Invoke(new FluentDictionary<string, object>()));
}

/// <summary>
/// The type of t-test
/// </summary>
[StringEnum]
public enum TTestType
{
/// <summary>
/// performs paired t-test
/// </summary>
[EnumMember(Value = "paired")]
Paired,

/// <summary>
/// performs two-sample equal variance test
/// </summary>
[EnumMember(Value = "homoscedastic")]
Homoscedastic,

/// <summary>
/// performs two-sample unequal variance test (this is default)
/// </summary>
[EnumMember(Value = "heteroscedastic")]
Heteroscedastic,
}
}
74 changes: 74 additions & 0 deletions src/Nest/Aggregations/Metric/TTest/TTestGroup.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Licensed to Elasticsearch B.V under one or more agreements.
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

using System;
using System.Linq.Expressions;
using System.Runtime.Serialization;
using Elasticsearch.Net.Utf8Json;

namespace Nest
{
/// <summary>
/// A population for a <see cref="TTestAggregation"/>
/// </summary>
[InterfaceDataContract]
[ReadAs(typeof(TTestPopulation))]
public interface ITTestPopulation
{
/// <summary>
/// The field to use for the population values. Must be a numeric field.
/// </summary>
[DataMember(Name = "field")]
Field Field { get; set; }

/// <summary>
/// A script tp use to calculate population values.
/// </summary>
[DataMember(Name = "script")]
IScript Script { get; set; }

/// <summary>
/// A filter to apply to target field to filter population values. Useful
/// when two populations use the same field for values, to filter the values.
/// </summary>
[DataMember(Name = "filter")]
QueryContainer Filter { get; set; }
}

/// <inheritdoc />
public class TTestPopulation : ITTestPopulation
{
/// <inheritdoc />
public Field Field { get; set; }
/// <inheritdoc />
public IScript Script { get; set; }
/// <inheritdoc />
public QueryContainer Filter { get; set; }
}

/// <inheritdoc cref="ITTestPopulation"/>
public class TTestPopulationDescriptor<T> : DescriptorBase<TTestPopulationDescriptor<T>, ITTestPopulation>, ITTestPopulation where T : class
{
Field ITTestPopulation.Field { get; set; }
IScript ITTestPopulation.Script { get; set; }
QueryContainer ITTestPopulation.Filter { get; set; }

/// <inheritdoc cref="ITTestPopulation.Field"/>
public TTestPopulationDescriptor<T> Field(Field field) => Assign(field, (a, v) => a.Field = v);

/// <inheritdoc cref="ITTestPopulation.Field"/>
public TTestPopulationDescriptor<T> Field<TValue>(Expression<Func<T, TValue>> field) => Assign(field, (a, v) => a.Field = v);

/// <inheritdoc cref="ITTestPopulation.Script"/>
public TTestPopulationDescriptor<T> Script(string script) => Assign((InlineScript)script, (a, v) => a.Script = v);

/// <inheritdoc cref="ITTestPopulation.Script"/>
public TTestPopulationDescriptor<T> Script(Func<ScriptDescriptor, IScript> scriptSelector) =>
Assign(scriptSelector, (a, v) => a.Script = v?.Invoke(new ScriptDescriptor()));

/// <inheritdoc cref="ITTestPopulation.Filter"/>
public TTestPopulationDescriptor<T> Filter(Func<QueryContainerDescriptor<T>, QueryContainer> filter) =>
Assign(filter, (a, v) => a.Filter = v?.Invoke(new QueryContainerDescriptor<T>()));
}
}
2 changes: 1 addition & 1 deletion src/Nest/Aggregations/Metric/ValueAggregate.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
// See the LICENSE file in the project root for more information

namespace Nest
namespace Nest
{
public class ValueAggregate : MetricAggregateBase
{
Expand Down
Loading