diff --git a/lib/explorer/backend/lazy_series.ex b/lib/explorer/backend/lazy_series.ex index a72634b07..404e487b5 100644 --- a/lib/explorer/backend/lazy_series.ex +++ b/lib/explorer/backend/lazy_series.ex @@ -73,6 +73,8 @@ defmodule Explorer.Backend.LazySeries do window_sum: 5, window_standard_deviation: 5, ewm_mean: 5, + ewm_standard_deviation: 6, + ewm_variance: 6, # Transformation column: 1, reverse: 1, @@ -662,6 +664,28 @@ defmodule Explorer.Backend.LazySeries do Backend.Series.new(data, {:f, 64}) end + @impl true + def ewm_standard_deviation(%Series{} = series, alpha, adjust, bias, min_periods, ignore_nils) do + args = [lazy_series!(series), alpha, adjust, bias, min_periods, ignore_nils] + + if aggregations?(args), do: raise_agg_inside_window(:ewm_standard_deviation) + + data = new(:ewm_standard_deviation, args, {:f, 64}, false) + + Backend.Series.new(data, {:f, 64}) + end + + @impl true + def ewm_variance(%Series{} = series, alpha, adjust, bias, min_periods, ignore_nils) do + args = [lazy_series!(series), alpha, adjust, bias, min_periods, ignore_nils] + + if aggregations?(args), do: raise_agg_inside_window(:ewm_variance) + + data = new(:ewm_variance, args, {:f, 64}, false) + + Backend.Series.new(data, {:f, 64}) + end + defp dtype_for_agg_operation(op, _) when op in [:count, :nil_count, :n_distinct], do: :integer defp dtype_for_agg_operation(op, series) diff --git a/lib/explorer/backend/series.ex b/lib/explorer/backend/series.ex index 04f71480d..7f2cd1a48 100644 --- a/lib/explorer/backend/series.ex +++ b/lib/explorer/backend/series.ex @@ -243,6 +243,24 @@ defmodule Explorer.Backend.Series do ignore_nils :: boolean() ) :: s + @callback ewm_standard_deviation( + s, + alpha :: float(), + adjust :: boolean(), + bias :: boolean(), + min_periods :: integer(), + ignore_nils :: boolean() + ) :: s + + @callback ewm_variance( + s, + alpha :: float(), + adjust :: boolean(), + bias :: boolean(), + min_periods :: integer(), + ignore_nils :: boolean() + ) :: s + # Nulls @callback fill_missing_with_strategy(s, :backward | :forward | :min | :max | :mean) :: s diff --git a/lib/explorer/polars_backend/expression.ex b/lib/explorer/polars_backend/expression.ex index d099cc97d..29a69fdd6 100644 --- a/lib/explorer/polars_backend/expression.ex +++ b/lib/explorer/polars_backend/expression.ex @@ -118,6 +118,8 @@ defmodule Explorer.PolarsBackend.Expression do window_sum: 5, window_standard_deviation: 5, ewm_mean: 5, + ewm_standard_deviation: 6, + ewm_variance: 6, # Conversions strptime: 2, diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex index 1fa67243c..418b56213 100644 --- a/lib/explorer/polars_backend/native.ex +++ b/lib/explorer/polars_backend/native.ex @@ -418,6 +418,8 @@ defmodule Explorer.PolarsBackend.Native do do: err() def s_ewm_mean(_s, _alpha, _adjust, _min_periods, _ignore_nils), do: err() + def s_ewm_standard_deviation(_s, _alpha, _adjust, _bias, _min_periods, _ignore_nils), do: err() + def s_ewm_variance(_s, _alpha, _adjust, _bias, _min_periods, _ignore_nils), do: err() def s_in(_s, _other), do: err() def s_day_of_week(_s), do: err() def s_day_of_year(_s), do: err() diff --git a/lib/explorer/polars_backend/series.ex b/lib/explorer/polars_backend/series.ex index c21adc8ce..5d5cdfc59 100644 --- a/lib/explorer/polars_backend/series.ex +++ b/lib/explorer/polars_backend/series.ex @@ -533,6 +533,20 @@ defmodule Explorer.PolarsBackend.Series do Shared.apply_series(series, :s_ewm_mean, [alpha, adjust, min_periods, ignore_nils]) end + @impl true + def ewm_standard_deviation(series, alpha, adjust, bias, min_periods, ignore_nils) do + Shared.apply_series( + series, + :s_ewm_standard_deviation, + [alpha, adjust, bias, min_periods, ignore_nils] + ) + end + + @impl true + def ewm_variance(series, alpha, adjust, bias, min_periods, ignore_nils) do + Shared.apply_series(series, :s_ewm_variance, [alpha, adjust, bias, min_periods, ignore_nils]) + end + # Missing values @impl true diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 63a33cecf..b5ac618da 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -4848,6 +4848,116 @@ defmodule Explorer.Series do ]) end + @doc """ + Calculate the exponentially weighted moving standard deviation, given smoothing factor alpha. + + ## Options + + * `:alpha` - Optional smoothing factor which specifies the imporance given + to most recent observations. It is a value such that, 0 < alpha <= 1. Defaults to 0.5. + + * `:adjust` - If set to true, it corrects the bias introduced by smoothing process. + Defaults to `true`. + + * `:bias` - If set to false, it corrects the estimate to be statistically unbiased. + Defaults to `false`. + + * `:min_periods` - The number of values in the window that should be non-nil + before computing a result. Defaults to `1`. + + * `:ignore_nils` - If set to true, it ignore nulls in the calculation. Defaults to `true`. + + ## Examples + + iex> s = 1..5 |> Enum.to_list() |> Explorer.Series.from_list() + iex> Explorer.Series.ewm_standard_deviation(s) + #Explorer.Series< + Polars[5] + f64 [0.0, 0.7071067811865476, 0.9636241116594314, 1.1771636613972951, 1.3452425132127066] + > + + iex> s = 1..5 |> Enum.to_list() |> Explorer.Series.from_list() + iex> Explorer.Series.ewm_standard_deviation(s, alpha: 0.1) + #Explorer.Series< + Polars[5] + f64 [0.0, 0.7071067811865476, 0.9990770648702808, 1.2879021599718157, 1.5741638698820746] + > + """ + @doc type: :window + def ewm_standard_deviation(series, opts \\ []) do + opts = + Keyword.validate!(opts, + alpha: 0.5, + adjust: true, + bias: false, + min_periods: 1, + ignore_nils: true + ) + + apply_series(series, :ewm_standard_deviation, [ + opts[:alpha], + opts[:adjust], + opts[:bias], + opts[:min_periods], + opts[:ignore_nils] + ]) + end + + @doc """ + Calculate the exponentially weighted moving variance, given smoothing factor alpha. + + ## Options + + * `:alpha` - Optional smoothing factor which specifies the imporance given + to most recent observations. It is a value such that, 0 < alpha <= 1. Defaults to 0.5. + + * `:adjust` - If set to true, it corrects the bias introduced by smoothing process. + Defaults to `true`. + + * `:bias` - If set to false, it corrects the estimate to be statistically unbiased. + Defaults to `false`. + + * `:min_periods` - The number of values in the window that should be non-nil + before computing a result. Defaults to `1`. + + * `:ignore_nils` - If set to true, it ignore nulls in the calculation. Defaults to `true`. + + ## Examples + + iex> s = 1..5 |> Enum.to_list() |> Explorer.Series.from_list() + iex> Explorer.Series.ewm_variance(s) + #Explorer.Series< + Polars[5] + f64 [0.0, 0.5, 0.9285714285714284, 1.385714285714286, 1.8096774193548393] + > + + iex> s = 1..5 |> Enum.to_list() |> Explorer.Series.from_list() + iex> Explorer.Series.ewm_variance(s, alpha: 0.1) + #Explorer.Series< + Polars[5] + f64 [0.0, 0.5, 0.9981549815498153, 1.6586919736600685, 2.4779918892421087] + > + """ + @doc type: :window + def ewm_variance(series, opts \\ []) do + opts = + Keyword.validate!(opts, + alpha: 0.5, + adjust: true, + bias: false, + min_periods: 1, + ignore_nils: true + ) + + apply_series(series, :ewm_variance, [ + opts[:alpha], + opts[:adjust], + opts[:bias], + opts[:min_periods], + opts[:ignore_nils] + ]) + end + # Missing values @doc """ diff --git a/native/explorer/src/expressions.rs b/native/explorer/src/expressions.rs index 26c1a5157..636617d0a 100644 --- a/native/explorer/src/expressions.rs +++ b/native/explorer/src/expressions.rs @@ -8,7 +8,7 @@ use polars::prelude::{ col, concat_str, cov, pearson_corr, spearman_rank_corr, when, IntoLazy, LiteralValue, SortOptions, }; -use polars::prelude::{DataType, Expr, Literal, StrptimeOptions, TimeUnit}; +use polars::prelude::{DataType, Expr, EWMOptions, Literal, StrptimeOptions, TimeUnit}; use crate::datatypes::{ ExCorrelationMethod, ExDate, ExDateTime, ExDuration, ExRankMethod, ExSeriesDtype, ExValidValue, @@ -699,6 +699,34 @@ pub fn expr_ewm_mean( ExExpr::new(expr.ewm_mean(opts)) } +#[rustler::nif] +pub fn expr_ewm_standard_deviation( + data: ExExpr, + alpha: f64, + adjust: bool, + bias: bool, + min_periods: usize, + ignore_nulls: bool, +) -> ExExpr { + let expr = data.clone_inner(); + let opts = EWMOptions { alpha, adjust, bias, min_periods, ignore_nulls, ..Default::default() }; + ExExpr::new(expr.ewm_std(opts)) +} + +#[rustler::nif] +pub fn expr_ewm_variance( + data: ExExpr, + alpha: f64, + adjust: bool, + bias: bool, + min_periods: usize, + ignore_nulls: bool, +) -> ExExpr { + let expr = data.clone_inner(); + let opts = EWMOptions { alpha, adjust, bias, min_periods, ignore_nulls, ..Default::default() }; + ExExpr::new(expr.ewm_var(opts)) +} + #[rustler::nif] pub fn expr_reverse(expr: ExExpr) -> ExExpr { let expr = expr.clone_inner(); diff --git a/native/explorer/src/lib.rs b/native/explorer/src/lib.rs index 8b0da8a5b..32db7e0ff 100644 --- a/native/explorer/src/lib.rs +++ b/native/explorer/src/lib.rs @@ -256,6 +256,8 @@ rustler::init!( expr_window_sum, expr_window_standard_deviation, expr_ewm_mean, + expr_ewm_standard_deviation, + expr_ewm_variance, // inspect expressions expr_describe_filter_plan, // string expressions @@ -461,6 +463,8 @@ rustler::init!( s_window_sum, s_window_standard_deviation, s_ewm_mean, + s_ewm_standard_deviation, + s_ewm_variance, s_in, s_round, s_floor, diff --git a/native/explorer/src/series.rs b/native/explorer/src/series.rs index cba8955b4..1f074d48c 100644 --- a/native/explorer/src/series.rs +++ b/native/explorer/src/series.rs @@ -872,6 +872,34 @@ pub fn s_ewm_mean( Ok(ExSeries::new(s1)) } +#[rustler::nif(schedule = "DirtyCpu")] +pub fn s_ewm_standard_deviation( + series: ExSeries, + alpha: f64, + adjust: bool, + bias: bool, + min_periods: usize, + ignore_nulls: bool, +) -> Result { + let opts = EWMOptions { alpha, adjust, bias, min_periods, ignore_nulls, ..Default::default() }; + let s1 = polars_ops::prelude::ewm_std(&series, opts)?; + Ok(ExSeries::new(s1)) +} + +#[rustler::nif(schedule = "DirtyCpu")] +pub fn s_ewm_variance( + series: ExSeries, + alpha: f64, + adjust: bool, + bias: bool, + min_periods: usize, + ignore_nulls: bool, +) -> Result { + let opts = EWMOptions { alpha, adjust, bias, min_periods, ignore_nulls, ..Default::default() }; + let s1 = polars_ops::prelude::ewm_var(&series, opts)?; + Ok(ExSeries::new(s1)) +} + pub fn ewm_opts(alpha: f64, adjust: bool, min_periods: usize, ignore_nulls: bool) -> EWMOptions { EWMOptions { alpha, diff --git a/test/explorer/series_test.exs b/test/explorer/series_test.exs index cb053c6d9..31c245f33 100644 --- a/test/explorer/series_test.exs +++ b/test/explorer/series_test.exs @@ -3751,6 +3751,262 @@ defmodule Explorer.SeriesTest do end end + describe "ewm_standard_deviation/2" do + test "returns calculated ewm std values with default options used for calculation" do + s1 = 1..10 |> Enum.to_list() |> Series.from_list() + s2 = Series.ewm_standard_deviation(s1) + + assert Series.to_list(s2) == [ + 0.0, + 0.7071067811865476, + 0.9636241116594314, + 1.1771636613972951, + 1.3452425132127066, + 1.4709162008918397, + 1.5607315639222439, + 1.6224598916602895, + 1.6634845490537977, + 1.689976601128564 + ] + end + + test "returns calculated ewm std with different smoothing factor if different alpha is passed" do + s1 = 1..10 |> Enum.to_list() |> Series.from_list() + s2 = Series.ewm_standard_deviation(s1, alpha: 0.8) + + assert Series.to_list(s2) == [ + 0.0, + 0.7071067811865476, + 0.8613567692141088, + 0.930593876392466, + 0.9563763729664396, + 0.9647929424175131, + 0.9672984330369606, + 0.9679969383076764, + 0.9681825776281606, + 0.9682301709724406 + ] + end + + test "returns calculated ewm std with nils for index less than min period size, if min_periods is set" do + s1 = 1..10 |> Enum.to_list() |> Series.from_list() + s2 = Series.ewm_standard_deviation(s1, min_periods: 5) + + assert Series.to_list(s2) == [ + nil, + nil, + nil, + nil, + 1.3452425132127066, + 1.4709162008918397, + 1.5607315639222439, + 1.6224598916602895, + 1.6634845490537977, + 1.689976601128564 + ] + end + + test "ignores nil by default and calculates ewm std" do + s1 = Series.from_list([1, nil, 2, nil, 3, 4, 5, 6, 7, 8]) + s2 = Series.ewm_standard_deviation(s1, ignore_nils: true) + + assert Series.to_list(s2) == [ + 0.0, + 0.0, + 0.7071067811865476, + 0.7071067811865476, + 0.9636241116594314, + 1.1771636613972951, + 1.3452425132127066, + 1.4709162008918397, + 1.5607315639222439, + 1.6224598916602895 + ] + end + + test "does not ignore nil if set ignore_nils option to false and calculates ewm std" do + s1 = Series.from_list([1, nil, 2, nil, 3, 4, 5, 6, 7, 8]) + s2 = Series.ewm_standard_deviation(s1, ignore_nils: false) + + assert Series.to_list(s2) == [ + 0.0, + 0.0, + 0.7071067811865476, + 0.7071067811865476, + 0.8864052604279183, + 0.9772545497599153, + 1.1470897308102692, + 1.3067888637766594, + 1.4363395171897309, + 1.5336045526865307 + ] + end + + test "returns calculated ewm std without adjustment if adjust option is set to false" do + s1 = 1..10 |> Enum.to_list() |> Series.from_list() + s2 = Series.ewm_standard_deviation(s1, adjust: false) + + assert Series.to_list(s2) == [ + 0.0, + 0.7071067811865476, + 1.0488088481701516, + 1.300183137283433, + 1.46929354773366, + 1.5764952405261994, + 1.641829587869702, + 1.6805652557493016, + 1.7030595977801866, + 1.7159083446458816 + ] + end + + test "returns calculated ewm std with bias if bias option is set to true" do + s1 = 1..10 |> Enum.to_list() |> Series.from_list() + s2 = Series.ewm_standard_deviation(s1, bias: true) + + assert Series.to_list(s2) == [ + 0.0, + 0.4714045207910317, + 0.7284313590846835, + 0.9285592184789413, + 1.0805247886738212, + 1.191428190780648, + 1.2693050154594225, + 1.3221328870469677, + 1.3568998042691014, + 1.3791855333404945 + ] + end + end + + describe "ewm_variance/2" do + test "returns calculated ewm var values with default options used for calculation" do + s1 = 1..10 |> Enum.to_list() |> Series.from_list() + s2 = Series.ewm_variance(s1) + + assert Series.to_list(s2) == [ + 0.0, + 0.5, + 0.9285714285714284, + 1.385714285714286, + 1.8096774193548393, + 2.163594470046083, + 2.435883014623173, + 2.632376100046318, + 2.7671808449407167, + 2.8560209123620535 + ] + end + + test "returns calculated ewm var with different smoothing factor if different alpha is passed" do + s1 = 1..10 |> Enum.to_list() |> Series.from_list() + s2 = Series.ewm_variance(s1, alpha: 0.8) + + assert Series.to_list(s2) == [ + 0.0, + 0.5, + 0.7419354838709674, + 0.8660049627791564, + 0.9146557667684424, + 0.9308254217386428, + 0.9356662585557595, + 0.9370180725730355, + 0.9373775036227093, + 0.9374696639813216 + ] + end + + test "returns calculated ewm var with nils for index less than min period size, if min_periods is set" do + s1 = 1..10 |> Enum.to_list() |> Series.from_list() + s2 = Series.ewm_variance(s1, min_periods: 5) + + assert Series.to_list(s2) == [ + nil, + nil, + nil, + nil, + 1.8096774193548393, + 2.163594470046083, + 2.435883014623173, + 2.632376100046318, + 2.7671808449407167, + 2.8560209123620535 + ] + end + + test "ignores nil by default and calculates ewm var" do + s1 = Series.from_list([1, nil, 2, nil, 3, 4, 5, 6, 7, 8]) + s2 = Series.ewm_variance(s1, ignore_nils: true) + + assert Series.to_list(s2) == [ + 0.0, + 0.0, + 0.5, + 0.5, + 0.9285714285714284, + 1.385714285714286, + 1.8096774193548393, + 2.163594470046083, + 2.435883014623173, + 2.632376100046318 + ] + end + + test "does not ignore nil if set ignore_nils option to false and calculates ewm var" do + s1 = Series.from_list([1, nil, 2, nil, 3, 4, 5, 6, 7, 8]) + s2 = Series.ewm_variance(s1, ignore_nils: false) + + assert Series.to_list(s2) == [ + 0.0, + 0.0, + 0.5, + 0.5, + 0.7857142857142857, + 0.9550264550264549, + 1.315814850530376, + 1.7076971344906926, + 2.0630712086408294, + 2.3519429240208543 + ] + end + + test "returns calculated ewm var without adjustment if adjust option is set to false" do + s1 = 1..10 |> Enum.to_list() |> Series.from_list() + s2 = Series.ewm_variance(s1, adjust: false) + + assert Series.to_list(s2) == [ + 0.0, + 0.5, + 1.1, + 1.6904761904761905, + 2.1588235294117646, + 2.4853372434017595, + 2.695604395604396, + 2.824299578831716, + 2.9004119935912107, + 2.9443414472253693 + ] + end + + test "returns calculated ewm var with bias if bias option is set to true" do + s1 = 1..10 |> Enum.to_list() |> Series.from_list() + s2 = Series.ewm_variance(s1, bias: true) + + assert Series.to_list(s2) == [ + 0.0, + 0.2222222222222222, + 0.5306122448979591, + 0.8622222222222223, + 1.167533818938606, + 1.4195011337868484, + 1.6111352222704451, + 1.7480353710111498, + 1.8411770788255257, + 1.9021527353757046 + ] + end + end + describe "mean/1" do test "returns the mean of an integer series" do s = Series.from_list([1, 2, nil, 3])