Skip to content

Commit

Permalink
Port / Add Documentation for VarianceSample and VariancePopulation
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Oct 3, 2024
1 parent 501b065 commit c61c09c
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 45 deletions.
13 changes: 13 additions & 0 deletions datafusion/expr/src/udf_docs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ impl DocumentationBuilder {
self
}

/// Adds an argument to the documentation,
pub fn with_argument(
mut self,
arg_name: impl Into<String>,
Expand All @@ -142,6 +143,18 @@ impl DocumentationBuilder {
self
}

/// Add a standard "expression" argument to the documentation
///
/// This is a common argument for scalar UDFs that operate on an expression and is rendered like
///
/// ```no-run
/// expression:
/// Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.
/// ```
pub fn with_expression_argument(self) -> Self {
self.with_argument("expression", "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators.")
}

pub fn with_related_udf(mut self, related_udf: impl Into<String>) -> Self {
let mut related = self.related_udfs.unwrap_or_default();
related.push(related_udf.into());
Expand Down
47 changes: 44 additions & 3 deletions datafusion/functions-aggregate/src/variance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,22 +18,24 @@
//! [`VarianceSample`]: variance sample aggregations.
//! [`VariancePopulation`]: variance population aggregations.
use std::{fmt::Debug, sync::Arc};

use arrow::{
array::{Array, ArrayRef, BooleanArray, Float64Array, UInt64Array},
buffer::NullBuffer,
compute::kernels::cast,
datatypes::{DataType, Field},
};
use std::sync::OnceLock;
use std::{fmt::Debug, sync::Arc};

use datafusion_common::{
downcast_value, not_impl_err, plan_err, DataFusionError, Result, ScalarValue,
};
use datafusion_expr::aggregate_doc_sections::DOC_SECTION_GENERAL;
use datafusion_expr::{
function::{AccumulatorArgs, StateFieldsArgs},
utils::format_state_name,
Accumulator, AggregateUDFImpl, GroupsAccumulator, Signature, Volatility,
Accumulator, AggregateUDFImpl, Documentation, GroupsAccumulator, Signature,
Volatility,
};
use datafusion_functions_aggregate_common::{
aggregate::groups_accumulator::accumulate::accumulate, stats::StatsType,
Expand Down Expand Up @@ -135,6 +137,26 @@ impl AggregateUDFImpl for VarianceSample {
) -> Result<Box<dyn GroupsAccumulator>> {
Ok(Box::new(VarianceGroupsAccumulator::new(StatsType::Sample)))
}

fn documentation(&self) -> Option<&Documentation> {
Some(get_variance_sample_doc())
}
}

static VARIANCE_SAMPLE_DOC: OnceLock<Documentation> = OnceLock::new();

fn get_variance_sample_doc() -> &'static Documentation {
VARIANCE_SAMPLE_DOC.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_GENERAL)
.with_description(
"Returns the statistical sample variance of a set of numbers.",
)
.with_syntax_example("var(expression)")
.with_expression_argument()
.build()
.unwrap()
})
}

pub struct VariancePopulation {
Expand Down Expand Up @@ -222,6 +244,25 @@ impl AggregateUDFImpl for VariancePopulation {
StatsType::Population,
)))
}
fn documentation(&self) -> Option<&Documentation> {
Some(get_variance_population_doc())
}
}

static VARIANCE_POPULATION_DOC: OnceLock<Documentation> = OnceLock::new();

fn get_variance_population_doc() -> &'static Documentation {
VARIANCE_POPULATION_DOC.get_or_init(|| {
Documentation::builder()
.with_doc_section(DOC_SECTION_GENERAL)
.with_description(
"Returns the statistical population variance of a set of numbers.",
)
.with_syntax_example("var_pop(expression)")
.with_expression_argument()
.build()
.unwrap()
})
}

/// An accumulator to compute variance
Expand Down
42 changes: 0 additions & 42 deletions docs/source/user-guide/sql/aggregate_functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,6 @@ last_value(expression [ORDER BY expression])
- [stddev](#stddev)
- [stddev_pop](#stddev_pop)
- [stddev_samp](#stddev_samp)
- [var](#var)
- [var_pop](#var_pop)
- [var_samp](#var_samp)
- [regr_avgx](#regr_avgx)
- [regr_avgy](#regr_avgy)
- [regr_count](#regr_count)
Expand Down Expand Up @@ -349,45 +346,6 @@ stddev_samp(expression)

#### Arguments

- **expression**: Expression to operate on.
Can be a constant, column, or function, and any combination of arithmetic operators.

### `var`

Returns the statistical variance of a set of numbers.

```
var(expression)
```

#### Arguments

- **expression**: Expression to operate on.
Can be a constant, column, or function, and any combination of arithmetic operators.

### `var_pop`

Returns the statistical population variance of a set of numbers.

```
var_pop(expression)
```

#### Arguments

- **expression**: Expression to operate on.
Can be a constant, column, or function, and any combination of arithmetic operators.

### `var_samp`

Returns the statistical sample variance of a set of numbers.

```
var_samp(expression)
```

#### Arguments

- **expression**: Expression to operate on.
Can be a constant, column, or function, and any combination of arithmetic operators.

Expand Down

0 comments on commit c61c09c

Please sign in to comment.