From ea79679e488798841bc252395bc2665ea342e168 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Jan 2024 15:31:05 +0100 Subject: [PATCH 01/13] Bump jinja2 from 3.0.3 to 3.1.3 in /requirements (#2155) Bumps [jinja2](https://github.com/pallets/jinja) from 3.0.3 to 3.1.3. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.0.3...3.1.3) --- updated-dependencies: - dependency-name: jinja2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements/release.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/release.txt b/requirements/release.txt index d83cefcdc8..fb5b13e8f1 100644 --- a/requirements/release.txt +++ b/requirements/release.txt @@ -5,7 +5,7 @@ ipykernel==5.3.4 ipywidgets==7.5.1 jupyterlab==4.0.3 ipython_genutils==0.2.0 -jinja2==3.0.3 +jinja2==3.1.3 m2r2==0.3.2 nbsphinx==0.8.7 numpydoc==1.1.0 From cb724d15e84b718543580b35f24443419354331c Mon Sep 17 00:00:00 2001 From: FourierMourier <91980559+FourierMourier@users.noreply.github.com> Date: Sat, 13 Jan 2024 14:58:42 +0300 Subject: [PATCH 02/13] Fix: removed input re-normalization by rin inside `io_processor` (#2160) * prevented input re-normalization by rin using .clone() inside `io_processor` * Update CHANGELOG.md --------- Co-authored-by: Dennis Bader --- CHANGELOG.md | 2 +- darts/models/forecasting/pl_forecasting_module.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95f52f1a76..45fa2eb617 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co **Improved** **Fixed** - +- Fixed a bug when using a `TorchForecastingModel` with `use_reversible_instance_norm=True` and predicting with `n > output_chunk_length`. The input normalized multiple times. [#2160](https://github.com/unit8co/darts/pull/2160) by [FourierMourier](https://github.com/FourierMourier). ### For developers of the library: ## [0.27.1](https://github.com/unit8co/darts/tree/0.27.1) (2023-12-10) diff --git a/darts/models/forecasting/pl_forecasting_module.py b/darts/models/forecasting/pl_forecasting_module.py index 7ade7eaac9..ab98ee59c2 100644 --- a/darts/models/forecasting/pl_forecasting_module.py +++ b/darts/models/forecasting/pl_forecasting_module.py @@ -50,7 +50,8 @@ def forward_wrapper(self, *args, **kwargs): # x is input batch tuple which by definition has the past features in the first element starting with the # first n target features - x: Tuple = args[0][0] + # assuming `args[0][0]` is torch.Tensor we could clone it to prevent target re-normalization + x: Tuple = args[0][0].clone() # apply reversible instance normalization x[:, :, : self.n_targets] = self.rin(x[:, :, : self.n_targets]) # run the forward pass From de4afd1290933907f7f1d29482507cf88d4ce69b Mon Sep 17 00:00:00 2001 From: Dennis Bader Date: Sun, 14 Jan 2024 13:33:43 +0100 Subject: [PATCH 03/13] sphinx sphinx release workflow issue (#2165) --- .../test_torch_forecasting_model.py | 19 +++++++++++-------- requirements/release.txt | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/darts/tests/models/forecasting/test_torch_forecasting_model.py b/darts/tests/models/forecasting/test_torch_forecasting_model.py index 12729d0519..24b8fd501e 100644 --- a/darts/tests/models/forecasting/test_torch_forecasting_model.py +++ b/darts/tests/models/forecasting/test_torch_forecasting_model.py @@ -707,17 +707,14 @@ def test_load_weights_params_check(self, tmpdir_fn): ckpt_path = os.path.join(tmpdir_fn, f"{model_name}.pt") # barebone model model = DLinearModel( - input_chunk_length=4, - output_chunk_length=1, - n_epochs=1, + input_chunk_length=4, output_chunk_length=1, n_epochs=1, **tfm_kwargs ) model.fit(self.series[:10]) model.save(ckpt_path) # identical model loading_model = DLinearModel( - input_chunk_length=4, - output_chunk_length=1, + input_chunk_length=4, output_chunk_length=1, **tfm_kwargs ) loading_model.load_weights(ckpt_path) @@ -726,21 +723,26 @@ def test_load_weights_params_check(self, tmpdir_fn): input_chunk_length=4, output_chunk_length=1, optimizer_cls=torch.optim.AdamW, + **tfm_kwargs, ) loading_model.load_weights(ckpt_path) + model_summary_kwargs = { + "pl_trainer_kwargs": dict( + {"enable_model_sumamry": False}, **tfm_kwargs["pl_trainer_kwargs"] + ) + } # different pl_trainer_kwargs loading_model = DLinearModel( input_chunk_length=4, output_chunk_length=1, - pl_trainer_kwargs={"enable_model_summary": False}, + **model_summary_kwargs, ) loading_model.load_weights(ckpt_path) # different input_chunk_length (tfm parameter) loading_model = DLinearModel( - input_chunk_length=4 + 1, - output_chunk_length=1, + input_chunk_length=4 + 1, output_chunk_length=1, **tfm_kwargs ) with pytest.raises(ValueError) as error_msg: loading_model.load_weights(ckpt_path) @@ -754,6 +756,7 @@ def test_load_weights_params_check(self, tmpdir_fn): input_chunk_length=4, output_chunk_length=1, kernel_size=10, + **tfm_kwargs, ) with pytest.raises(ValueError) as error_msg: loading_model.load_weights(ckpt_path) diff --git a/requirements/release.txt b/requirements/release.txt index fb5b13e8f1..fadc093e1c 100644 --- a/requirements/release.txt +++ b/requirements/release.txt @@ -12,7 +12,7 @@ numpydoc==1.1.0 papermill==2.2.2 pydata-sphinx-theme==0.7.2 recommonmark==0.7.1 -sphinx==4.3.2 +sphinx==5.0.0 sphinx-automodapi==0.14.0 sphinx_autodoc_typehints==1.12.0 twine==3.3.0 From 962fd78cb526887c47bddc33bea4b731adf72a87 Mon Sep 17 00:00:00 2001 From: DavidKleindienst <71123708+DavidKleindienst@users.noreply.github.com> Date: Mon, 15 Jan 2024 17:06:32 +0100 Subject: [PATCH 04/13] Fix: Improve TimeSeries.__getitem__ frequency inference (#2152) * Improve TimeSeries.__getitem__ frequency inference * adapt getitem handling * update changelog --------- Co-authored-by: David Kleindienst Co-authored-by: dennisbader --- CHANGELOG.md | 2 ++ darts/tests/test_timeseries.py | 55 +++++++++++++++++++++++++---- darts/timeseries.py | 64 +++++++++++++++++++++++++--------- 3 files changed, 99 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 45fa2eb617..fa602543ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ but cannot always guarantee backwards compatibility. Changes that may **break co ### For users of the library: **Improved** +- Improvements to `TimeSeries`: + - Improved the time series frequency inference when using slices or pandas DatetimeIndex as keys for `__getitem__`. [#2152](https://github.com/unit8co/darts/pull/2152) by [DavidKleindienst](https://github.com/DavidKleindienst). **Fixed** - Fixed a bug when using a `TorchForecastingModel` with `use_reversible_instance_norm=True` and predicting with `n > output_chunk_length`. The input normalized multiple times. [#2160](https://github.com/unit8co/darts/pull/2160) by [FourierMourier](https://github.com/FourierMourier). diff --git a/darts/tests/test_timeseries.py b/darts/tests/test_timeseries.py index db5459fee6..2a2b7c2788 100644 --- a/darts/tests/test_timeseries.py +++ b/darts/tests/test_timeseries.py @@ -17,7 +17,6 @@ class TestTimeSeries: - times = pd.date_range("20130101", "20130110", freq="D") pd_series1 = pd.Series(range(10), index=times) pd_series2 = pd.Series(range(5, 15), index=times) @@ -933,6 +932,55 @@ def test_getitem_integer_index(self): with pytest.raises(KeyError): _ = series[pd.RangeIndex(start, stop=end + 2 * freq, step=freq)] + def test_getitem_frequency_inferrence(self): + ts = self.series1 + assert ts.freq == "D" + ts_got = ts[1::2] + assert ts_got.freq == "2D" + ts_got = ts[pd.Timestamp("20130103") :: 2] + assert ts_got.freq == "2D" + + idx = pd.DatetimeIndex(["20130102", "20130105", "20130108"]) + ts_idx = ts[idx] + assert ts_idx.freq == "3D" + + # With BusinessDay frequency + offset = pd.offsets.BusinessDay() # Closed on Saturdays & Sundays + dates1 = pd.date_range("20231101", "20231126", freq=offset) + values1 = np.ones(len(dates1)) + ts = TimeSeries.from_times_and_values(dates1, values1) + assert ts.freq == ts[-4:].freq + + # Using a step parameter + assert ts[1::3].freq == 3 * ts.freq + assert ts[pd.Timestamp("20231102") :: 4].freq == 4 * ts.freq + + # Indexing with datetime index + idx = pd.date_range("20231101", "20231126", freq=offset) + assert ts[idx].freq == idx.freq + + def test_getitem_frequency_inferrence_integer_index(self): + start = 2 + freq = 3 + ts = TimeSeries.from_times_and_values( + times=pd.RangeIndex( + start=start, stop=start + freq * len(self.series1), step=freq + ), + values=self.series1.values(), + ) + + assert ts.freq == freq + ts_got = ts[1::2] + assert ts_got.start_time() == start + freq + assert ts_got.freq == 2 * freq + + idx = pd.RangeIndex( + start=start + 2 * freq, stop=start + 4 * freq, step=2 * freq + ) + ts_idx = ts[idx] + assert ts_idx.start_time() == idx[0] + assert ts_idx.freq == 2 * freq + def test_fill_missing_dates(self): with pytest.raises(ValueError): # Series cannot have date holes without automatic filling @@ -1050,7 +1098,6 @@ def test_fill_missing_dates(self): series_target = TimeSeries.from_dataframe(df_full, time_col="date") for df, df_name in zip([df_full, df_holes], ["full", "holes"]): - # fill_missing_dates will find multiple inferred frequencies (i.e. for 'B' it finds {'B', 'D'}) if offset_alias in offset_aliases_raise: with pytest.raises(ValueError): @@ -1519,7 +1566,6 @@ def test_to_csv_stochastic(self, pddf_mock): class TestTimeSeriesConcatenate: - # # COMPONENT AXIS TESTS # @@ -1735,7 +1781,6 @@ def test_concatenate_timeseries_method(self): class TestTimeSeriesHierarchy: - components = ["total", "a", "b", "x", "y", "ax", "ay", "bx", "by"] hierarchy = { @@ -1912,7 +1957,6 @@ def test_with_string_items(self): class TestTimeSeriesHeadTail: - ts = TimeSeries( xr.DataArray( np.random.rand(10, 10, 10), @@ -2185,7 +2229,6 @@ def test_df_named_columns_index(self): class TestSimpleStatistics: - times = pd.date_range("20130101", "20130110", freq="D") values = np.random.rand(10, 2, 100) ar = xr.DataArray( diff --git a/darts/timeseries.py b/darts/timeseries.py index 1792dfe345..0a715d10b8 100644 --- a/darts/timeseries.py +++ b/darts/timeseries.py @@ -4899,12 +4899,13 @@ def _check_range(): logger, ) - def _set_freq_in_xa(xa_: xr.DataArray): + def _set_freq_in_xa(xa_: xr.DataArray, freq=None): # mutates the DataArray to make sure it contains the freq if isinstance(xa_.get_index(self._time_dim), pd.DatetimeIndex): - inferred_freq = xa_.get_index(self._time_dim).inferred_freq - if inferred_freq is not None: - xa_.get_index(self._time_dim).freq = to_offset(inferred_freq) + if freq is None: + freq = xa_.get_index(self._time_dim).inferred_freq + if freq is not None: + xa_.get_index(self._time_dim).freq = to_offset(freq) else: xa_.get_index(self._time_dim).freq = self._freq @@ -4920,8 +4921,9 @@ def _set_freq_in_xa(xa_: xr.DataArray): xa_ = self._xa.sel({self._time_dim: key}) # indexing may discard the freq so we restore it... - # TODO: unit-test this - _set_freq_in_xa(xa_) + # if the DateTimeIndex already has an associated freq, use it + # otherwise key.freq is None and the freq will be inferred + _set_freq_in_xa(xa_, key.freq) return self.__class__(xa_) elif isinstance(key, pd.RangeIndex): @@ -4951,18 +4953,43 @@ def _set_freq_in_xa(xa_: xr.DataArray): key.stop, (int, np.int64) ): xa_ = self._xa.isel({self._time_dim: key}) - _set_freq_in_xa( - xa_ - ) # indexing may discard the freq so we restore it... + if isinstance(key.step, (int, np.int64)): + # new frequency is multiple of original + new_freq = key.step * self.freq + elif key.step is None: + new_freq = self.freq + else: + new_freq = None + raise_log( + ValueError( + f"Invalid slice step={key.step}. Only supports integer steps or `None`." + ), + logger=logger, + ) + # indexing may discard the freq so we restore it... + _set_freq_in_xa(xa_, new_freq) return self.__class__(xa_) elif isinstance(key.start, pd.Timestamp) or isinstance( key.stop, pd.Timestamp ): _check_dt() + if isinstance(key.step, (int, np.int64)): + # new frequency is multiple of original + new_freq = key.step * self.freq + elif key.step is None: + new_freq = self.freq + else: + new_freq = None + raise_log( + ValueError( + f"Invalid slice step={key.step}. Only supports integer steps or `None`." + ), + logger=logger, + ) # indexing may discard the freq so we restore it... xa_ = self._xa.sel({self._time_dim: key}) - _set_freq_in_xa(xa_) + _set_freq_in_xa(xa_, new_freq) return self.__class__(xa_) # handle simple types: @@ -5030,13 +5057,18 @@ def _set_freq_in_xa(xa_: xr.DataArray): # We have to restore a RangeIndex. But first we need to # check the list is corresponding to a RangeIndex. min_idx, max_idx = min(key), max(key) - raise_if_not( - key[0] == min_idx + if ( + not key[0] == min_idx and key[-1] == max_idx - and max_idx + 1 - min_idx == len(key), - "Indexing a TimeSeries with a list requires the list to contain monotically " - + "increasing integers with no gap.", - ) + and max_idx + 1 - min_idx == len(key) + ): + raise_log( + ValueError( + "Indexing a TimeSeries with a list requires the list to " + "contain monotonically increasing integers with no gap." + ), + logger=logger, + ) new_idx = orig_idx[min_idx : max_idx + 1] xa_ = xa_.assign_coords({self._time_dim: new_idx}) From 68f72a76e6462431b2c33809176df19717d6513d Mon Sep 17 00:00:00 2001 From: Dennis Bader Date: Fri, 19 Jan 2024 12:32:12 +0100 Subject: [PATCH 05/13] fix deprecated method import from sklearn==1.4.0 (#2170) --- darts/utils/multioutput.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/darts/utils/multioutput.py b/darts/utils/multioutput.py index d20d8fd32a..84e4f04523 100644 --- a/darts/utils/multioutput.py +++ b/darts/utils/multioutput.py @@ -1,13 +1,20 @@ +from sklearn import __version__ as sklearn_version from sklearn.base import is_classifier from sklearn.multioutput import MultiOutputRegressor as sk_MultiOutputRegressor from sklearn.multioutput import _fit_estimator from sklearn.utils.multiclass import check_classification_targets -from sklearn.utils.validation import _check_fit_params, has_fit_parameter +from sklearn.utils.validation import has_fit_parameter -try: +if sklearn_version >= "1.4": + # sklearn renamed `_check_fit_params` to `_check_method_params` in v1.4 + from sklearn.utils.validation import _check_method_params +else: + from sklearn.utils.validation import _check_fit_params as _check_method_params + +if sklearn_version >= "1.3": # delayed was moved from sklearn.utils.fixes to sklearn.utils.parallel in v1.3 from sklearn.utils.parallel import Parallel, delayed -except ImportError: +else: from joblib import Parallel from sklearn.utils.fixes import delayed @@ -65,7 +72,7 @@ def fit(self, X, y, sample_weight=None, **fit_params): ): raise ValueError("Underlying estimator does not support sample weights.") - fit_params_validated = _check_fit_params(X, fit_params) + fit_params_validated = _check_method_params(X, fit_params) if "eval_set" in fit_params_validated.keys(): # with validation set From 7fe7128a9e241906c36f87cbd2c35be4dd4be558 Mon Sep 17 00:00:00 2001 From: Felix Divo <4403130+felixdivo@users.noreply.github.com> Date: Fri, 19 Jan 2024 12:33:00 +0100 Subject: [PATCH 06/13] Update .gitignore to ignore intermediate coverage files (#2158) * Update .gitignore to ignore intermediate coverage files Ignores files such as the following, which get created by coverage measurement tools: .coverage.68a13e4de0d0.43514.XMNYGdrx .coverage.68a13e4de0d0.43515.XjbvzNqx * Update .gitignore Get rid of one line --------- Co-authored-by: Dennis Bader --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 2440df06b6..453913f0b7 100644 --- a/.gitignore +++ b/.gitignore @@ -12,7 +12,7 @@ build/ dist/ examples/.ipynb_checkpoints/ runs/ -.coverage +.coverage* htmlcov coverage.xml .darts From 8b77a69b961b3a83ce45fb198d05acf50453c2d3 Mon Sep 17 00:00:00 2001 From: Dennis Bader Date: Fri, 19 Jan 2024 12:40:37 +0100 Subject: [PATCH 07/13] Feat/ccf (#2122) * added statistics function plot_ccf * add unit test * update changelog * add reference to ccf --- CHANGELOG.md | 1 + darts/tests/utils/test_statistics.py | 4 + darts/utils/statistics.py | 136 ++++++++++++++++++++++++++- 3 files changed, 136 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fa602543ed..b0a4c4e66d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co ### For users of the library: **Improved** +- Added `darts.utils.statistics.plot_ccf` that can be used to plot the cross correlation between a time series (e.g. target series) and the lagged values of another time series (e.g. covariates series). [#2122](https://github.com/unit8co/darts/pull/2122) by [Dennis Bader](https://github.com/dennisbader). - Improvements to `TimeSeries`: - Improved the time series frequency inference when using slices or pandas DatetimeIndex as keys for `__getitem__`. [#2152](https://github.com/unit8co/darts/pull/2152) by [DavidKleindienst](https://github.com/DavidKleindienst). diff --git a/darts/tests/utils/test_statistics.py b/darts/tests/utils/test_statistics.py index 6b3ace96e3..e7775645c9 100644 --- a/darts/tests/utils/test_statistics.py +++ b/darts/tests/utils/test_statistics.py @@ -9,6 +9,8 @@ check_seasonality, extract_trend_and_seasonality, granger_causality_tests, + plot_acf, + plot_ccf, plot_pacf, plot_residuals_analysis, remove_seasonality, @@ -235,5 +237,7 @@ def test_statistics_plot(self): plt.close() plot_residuals_analysis(self.series[:10]) plt.close() + plot_acf(self.series) plot_pacf(self.series) + plot_ccf(self.series, self.series) plt.close() diff --git a/darts/utils/statistics.py b/darts/utils/statistics.py index 7b1fbdf601..faf4d1304c 100644 --- a/darts/utils/statistics.py +++ b/darts/utils/statistics.py @@ -10,8 +10,16 @@ import numpy as np from scipy.signal import argrelmax from scipy.stats import norm +from statsmodels.compat.python import lzip from statsmodels.tsa.seasonal import MSTL, STL, seasonal_decompose -from statsmodels.tsa.stattools import acf, adfuller, grangercausalitytests, kpss, pacf +from statsmodels.tsa.stattools import ( + acf, + adfuller, + ccovf, + grangercausalitytests, + kpss, + pacf, +) from darts import TimeSeries from darts.logging import get_logger, raise_if, raise_if_not, raise_log @@ -599,8 +607,8 @@ def plot_acf( default_formatting: bool = True, ) -> None: """ - Plots the ACF of `ts`, highlighting it at lag `m`, with corresponding significance interval. - Uses :func:`statsmodels.tsa.stattools.acf` [1]_ + Plots the Autocorrelation Function (ACF) of `ts`, highlighting it at lag `m`, with corresponding significance + interval. Uses :func:`statsmodels.tsa.stattools.acf` [1]_ Parameters ---------- @@ -695,8 +703,8 @@ def plot_pacf( default_formatting: bool = True, ) -> None: """ - Plots the Partial ACF of `ts`, highlighting it at lag `m`, with corresponding significance interval. - Uses :func:`statsmodels.tsa.stattools.pacf` [1]_ + Plots the Partial Autocorrelation Function (PACF) of `ts`, highlighting it at lag `m`, with corresponding + significance interval. Uses :func:`statsmodels.tsa.stattools.pacf` [1]_ Parameters ---------- @@ -785,6 +793,124 @@ def plot_pacf( axis.plot((0, max_lag + 1), (0, 0), color="black" if default_formatting else None) +def plot_ccf( + ts: TimeSeries, + ts_other: TimeSeries, + m: Optional[int] = None, + max_lag: int = 24, + alpha: float = 0.05, + bartlett_confint: bool = True, + fig_size: Tuple[int, int] = (10, 5), + axis: Optional[plt.axis] = None, + default_formatting: bool = True, +) -> None: + """ + Plots the Cross Correlation Function (CCF) between `ts` and `ts_other`, highlighting it at lag `m`, with + corresponding significance interval. Uses :func:`statsmodels.tsa.stattools.ccf` [1]_ + + This can be used to find the cross correlation between the target and different covariates lags. + If `ts_other` is identical `ts`, it corresponds to `plot_acf()`. + + Parameters + ---------- + ts + The TimeSeries whose CCF with `ts_other` should be plotted. + ts_other + The TimeSeries which to compare against `ts` in the CCF. E.g. check the cross correlation of different + covariate lags with the target. + m + Optionally, a time lag to highlight on the plot. + max_lag + The maximal lag order to consider. + alpha + The confidence interval to display. + bartlett_confint + The boolean value indicating whether the confidence interval should be + calculated using Bartlett's formula. + fig_size + The size of the figure to be displayed. + axis + Optionally, an axis object to plot the CCF on. + default_formatting + Whether to use the darts default scheme. + + References + ---------- + .. [1] https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.ccf.html + """ + + ts._assert_univariate() + ts_other._assert_univariate() + raise_if( + max_lag is None or not (1 <= max_lag < len(ts)), + "max_lag must be greater than or equal to 1 and less than len(ts).", + ) + raise_if( + m is not None and not (0 <= m <= max_lag), + "m must be greater than or equal to 0 and less than or equal to max_lag.", + ) + raise_if( + alpha is None or not (0 < alpha < 1), + "alpha must be greater than 0 and less than 1.", + ) + ts_other = ts_other.slice_intersect(ts) + if len(ts_other) != len(ts): + raise_log( + ValueError("`ts_other` must contain at least the full time index of `ts`."), + logger=logger, + ) + + x = ts.values() + y = ts_other.values() + cvf = ccovf(x=x, y=y, adjusted=True, demean=True, fft=False) + + ccf = cvf / (np.std(x) * np.std(y)) + ccf = ccf[: max_lag + 1] + + n_obs = len(x) + if bartlett_confint: + varccf = np.ones_like(ccf) / n_obs + varccf[0] = 0 + varccf[1] = 1.0 / n_obs + varccf[2:] *= 1 + 2 * np.cumsum(ccf[1:-1] ** 2) + else: + varccf = 1.0 / n_obs + + interval = norm.ppf(1.0 - alpha / 2.0) * np.sqrt(varccf) + confint = np.array(lzip(ccf - interval, ccf + interval)) + + if axis is None: + plt.figure(figsize=fig_size) + axis = plt + + for i in range(len(ccf)): + axis.plot( + (i, i), + (0, ccf[i]), + color=("#b512b8" if m is not None and i == m else "black") + if default_formatting + else None, + lw=(1 if m is not None and i == m else 0.5), + ) + + # Adjusts the upper band of the confidence interval to center it on the x axis. + upp_band = [confint[lag][1] - ccf[lag] for lag in range(1, max_lag + 1)] + + # Setting color t0 None overrides custom settings + extra_arguments = {} + if default_formatting: + extra_arguments["color"] = "#003DFD" + + axis.fill_between( + np.arange(1, max_lag + 1), + upp_band, + [-x for x in upp_band], + alpha=0.25 if default_formatting else None, + **extra_arguments, + ) + axis.plot((0, max_lag + 1), (0, 0), color="black" if default_formatting else None) + + def plot_hist( data: Union[TimeSeries, List[float], np.ndarray], bins: Optional[Union[int, np.ndarray, List[float]]] = None, From 315cb6f8d601d2c292a8c830051d64b54bf4b0ff Mon Sep 17 00:00:00 2001 From: Dennis Bader Date: Fri, 19 Jan 2024 15:29:38 +0100 Subject: [PATCH 08/13] fix torch covariates support tables in user guide (#2172) --- docs/userguide/torch_forecasting_models.md | 30 +++++++++++----------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/docs/userguide/torch_forecasting_models.md b/docs/userguide/torch_forecasting_models.md index af5c7b92f4..0c2ba84fde 100644 --- a/docs/userguide/torch_forecasting_models.md +++ b/docs/userguide/torch_forecasting_models.md @@ -95,27 +95,27 @@ Under the hood, Darts has 5 types of `{X}CovariatesModel` classes implemented to | Class | past covariates | future past covariates | future covariates | historic future covariates | |-------------------------|:---------------:|:----------------------:|:-----------------:|:--------------------------:| -| `PastCovariatesModel` | ✅ | ✅ | | | -| `FutureCovariatesModel` | | | ✅ | | -| `DualCovariatesModel` | | | ✅ | ✅ | -| `MixedCovariatesModel` | ✅ | ✅ | ✅ | ✅ | -| `SplitCovariatesModel` | ✅ | ✅ | ✅ | | +| `PastCovariatesModel` | ✅ | ✅ | | | +| `FutureCovariatesModel` | | | ✅ | | +| `DualCovariatesModel` | | | ✅ | ✅ | +| `SplitCovariatesModel` | ✅ | ✅ | ✅ | | +| `MixedCovariatesModel` | ✅ | ✅ | ✅ | ✅ | **Table 1: Darts' "{X}CovariatesModels" covariate support** Each Torch Forecasting Model inherits from one `{X}CovariatesModel` (covariate class names are abbreviated by the `X`-part): -| TFM | `Past` | `Future` | `Dual` | `Mixed` | `Split` | +| TFM | `Past` | `Future` | `Dual` | `Split` | `Mixed` | |--------------------|:------:|:--------:|:------:|:-------:|:-------:| -| `RNNModel` | | | ✅ | | | -| `BlockRNNModel` | ✅ | | | | | -| `NBEATSModel` | ✅ | | | | | -| `TCNModel` | ✅ | | | | | -| `TransformerModel` | ✅ | | | | | -| `TFTModel` | | | | ✅ | | -| `NLinearModel` | | | | ✅ | | -| `DLinearModel` | | | | ✅ | | -| `TiDEModel` | | | | ✅ | | +| `RNNModel` | | | ✅ | | | +| `BlockRNNModel` | ✅ | | | | | +| `NBEATSModel` | ✅ | | | | | +| `TCNModel` | ✅ | | | | | +| `TransformerModel` | ✅ | | | | | +| `TFTModel` | | | | | ✅ | +| `NLinearModel` | | | | | ✅ | +| `DLinearModel` | | | | | ✅ | +| `TiDEModel` | | | | | ✅ | **Table 2: Darts' Torch Forecasting Model covariate support** From ccecc8a3f89fdb4108f46bd8684bdc4341eaac5b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Jan 2024 23:17:11 +0100 Subject: [PATCH 09/13] Bump jupyterlab from 4.0.3 to 4.0.11 in /requirements (#2173) --- requirements/release.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/release.txt b/requirements/release.txt index fadc093e1c..5571b3c1b7 100644 --- a/requirements/release.txt +++ b/requirements/release.txt @@ -3,7 +3,7 @@ docutils==0.17.1 ipython==8.10.0 ipykernel==5.3.4 ipywidgets==7.5.1 -jupyterlab==4.0.3 +jupyterlab==4.0.11 ipython_genutils==0.2.0 jinja2==3.1.3 m2r2==0.3.2 From 9cb1a56d9bba149a6036665b961d28d1f0f01afa Mon Sep 17 00:00:00 2001 From: Dennis Bader Date: Sun, 21 Jan 2024 13:58:50 +0100 Subject: [PATCH 10/13] fixes for pandas >= 2.2.0 (#2177) --- darts/dataprocessing/transformers/midas.py | 8 ++++ darts/datasets/__init__.py | 10 ++++- darts/models/forecasting/prophet_model.py | 38 +++++++++++-------- .../dataprocessing/transformers/test_midas.py | 31 ++++++++++----- darts/tests/test_timeseries.py | 4 +- 5 files changed, 62 insertions(+), 29 deletions(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index 62e05a274b..d54e86a851 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -93,6 +93,14 @@ def __init__( .. [1] https://en.wikipedia.org/wiki/Mixed-data_sampling .. [2] https://pandas.pydata.org/docs/user_guide/timeseries.html#dateoffset-objects """ + if pd.tseries.frequencies.get_period_alias(low_freq) is None: + raise_log( + ValueError( + f"Cannot infer period alias for `low_freq={low_freq}`. " + f"Is it a valid pandas offset/frequency alias?" + ), + logger=logger, + ) self._low_freq = low_freq self._strip = strip self._drop_static_covariates = drop_static_covariates diff --git a/darts/datasets/__init__.py b/darts/datasets/__init__.py index 5f2954727e..eb5dd9a6a2 100644 --- a/darts/datasets/__init__.py +++ b/darts/datasets/__init__.py @@ -18,6 +18,8 @@ from .dataset_loaders import DatasetLoaderCSV, DatasetLoaderMetadata +pd_above_v22 = pd.__version__ >= "2.2" + """ Overall usage of this package: from darts.datasets import AirPassengersDataset @@ -886,6 +888,12 @@ def pre_process_dataset(dataset_path): df.index.name = "Timestamp" df.to_csv(self._get_path_dataset()) + # pandas v2.2.0 introduced some changes + hash_expected = ( + "485d81e9902cc0ccb1f86d7e01fb37cd" + if pd_above_v22 + else "a019125b7f9c1afeacb0ae60ce7455ef" + ) # hash value for dataset with weather data super().__init__( metadata=DatasetLoaderMetadata( @@ -895,7 +903,7 @@ def pre_process_dataset(dataset_path): "ewz_stromabgabe_netzebenen_stadt_zuerich/" "download/ewz_stromabgabe_netzebenen_stadt_zuerich.csv" ), - hash="a019125b7f9c1afeacb0ae60ce7455ef", + hash=hash_expected, header_time="Timestamp", freq="15min", pre_process_csv_fn=pre_process_dataset, diff --git a/darts/models/forecasting/prophet_model.py b/darts/models/forecasting/prophet_model.py index cb0abaab4a..b6674463fa 100644 --- a/darts/models/forecasting/prophet_model.py +++ b/darts/models/forecasting/prophet_model.py @@ -594,6 +594,7 @@ def _freq_to_days(freq: str) -> float: freq = "".join(re.split("[^a-zA-Z-]*", freq)).split("-")[0] seconds_per_day = 86400 + days = 0 if freq in ["A", "BA", "Y", "BY", "RE"] or freq.startswith( ("A", "BA", "Y", "BY", "RE") ): # year @@ -612,23 +613,28 @@ def _freq_to_days(freq: str) -> float: days = 1 * 7 / 5 elif freq in ["D"]: # day days = 1.0 - elif freq in ["H", "BH", "CBH"]: # hour - days = 1 / 24 - elif freq in ["T", "min"]: # minute - days = 1 / (24 * 60) - elif freq in ["S"]: # second - days = 1 / seconds_per_day - elif freq in ["L", "ms"]: # millisecond - days = 1 / (seconds_per_day * 10**3) - elif freq in ["U", "us"]: # microsecond - days = 1 / (seconds_per_day * 10**6) - elif freq in ["N"]: # nanosecond - days = 1 / (seconds_per_day * 10**9) else: - raise ValueError( - "freq {} not understood. Please report if you think this is in error.".format( - freq - ) + # all freqs higher than "D" are lower case in pandas >= 2.2.0 + freq_lower = freq.lower() + if freq_lower in ["h", "bh", "cbh"]: # hour + days = 1 / 24 + elif freq_lower in ["t", "min"]: # minute + days = 1 / (24 * 60) + elif freq_lower in ["s"]: # second + days = 1 / seconds_per_day + elif freq_lower in ["l", "ms"]: # millisecond + days = 1 / (seconds_per_day * 10**3) + elif freq_lower in ["u", "us"]: # microsecond + days = 1 / (seconds_per_day * 10**6) + elif freq_lower in ["n"]: # nanosecond + days = 1 / (seconds_per_day * 10**9) + + if not days: + raise_log( + ValueError( + f"freq {freq} not understood. Please report if you think this is in error." + ), + logger=logger, ) return freq_times * days diff --git a/darts/tests/dataprocessing/transformers/test_midas.py b/darts/tests/dataprocessing/transformers/test_midas.py index 4ec6090757..70b88eff9d 100644 --- a/darts/tests/dataprocessing/transformers/test_midas.py +++ b/darts/tests/dataprocessing/transformers/test_midas.py @@ -7,6 +7,12 @@ from darts.models import LinearRegressionModel from darts.utils.timeseries_generation import generate_index, linear_timeseries +# TODO: remove this once bumping min python version from 3.8 to 3.9 (pandas v2.2.0 not available for p38) +pd_above_v22 = pd.__version__ >= "2.2" +freq_quarter_end = "QE" if pd_above_v22 else "Q" +freq_month_end = "ME" if pd_above_v22 else "M" +freq_minute = "min" if pd_above_v22 else "T" + class TestMIDAS: monthly_ts = linear_timeseries( @@ -57,7 +63,7 @@ def test_complete_monthly_to_quarterly(self): assert self.monthly_ts == inversed_quarterly_ts_midas # to quarter end - midas_2 = MIDAS(low_freq="Q") + midas_2 = MIDAS(low_freq=freq_quarter_end) quarterly_ts_midas = midas_2.fit_transform(self.monthly_ts) assert quarterly_ts_midas == self.quarterly_with_quarter_end_index_ts @@ -332,23 +338,28 @@ def test_from_second_to_minute(self): columns=[f"values_midas_{i}" for i in range(60)], ) - midas = MIDAS(low_freq="T") + midas = MIDAS(low_freq=freq_minute) minute_ts_midas = midas.fit_transform(second_ts) assert minute_ts_midas == minute_ts second_ts_midas = midas.inverse_transform(minute_ts_midas) assert second_ts_midas == second_ts + def test_error_with_invalid_freq(self): + with pytest.raises(ValueError) as err: + _ = MIDAS(low_freq="MEE") + assert str(err.value).startswith("Cannot infer period alias for") + def test_error_when_from_low_to_high(self): """ Tests if the transformer raises an error when the user asks for a transform in the wrong direction. """ # wrong direction : low to high freq - midas_1 = MIDAS(low_freq="M") + midas_1 = MIDAS(low_freq=freq_month_end) with pytest.raises(ValueError): midas_1.fit_transform(self.quarterly_ts) # transform to same index requested - midas_2 = MIDAS(low_freq="Q") + midas_2 = MIDAS(low_freq=freq_quarter_end) with pytest.raises(ValueError): midas_2.fit_transform(self.quarterly_ts) @@ -365,7 +376,7 @@ def test_error_when_frequency_not_suitable_for_midas(self): times=daily_times, values=daily_values, columns=["values"] ) - midas = MIDAS(low_freq="M") + midas = MIDAS(low_freq=freq_month_end) with pytest.raises(ValueError) as msg: midas.fit_transform(daily_ts) assert str(msg.value).startswith( @@ -403,7 +414,7 @@ def test_inverse_transform_prediction(self): assert pred_monthly.time_index.equals(monthly_test_ts.time_index) # "Q" = QuarterEnd, the 2 "hidden" months must be retrieved - midas_quarterly = MIDAS(low_freq="Q") + midas_quarterly = MIDAS(low_freq=freq_quarter_end) quarterly_train_ts = midas_quarterly.fit_transform(monthly_train_ts) quarterly_test_ts = midas_quarterly.transform(monthly_test_ts) @@ -439,12 +450,12 @@ def test_multiple_ts(self): ts_to_transform = [self.monthly_ts, quarterly_univariate_ts] # ==> with stripping: not enough months, first series will be empty - midas_yearly = MIDAS(low_freq="AS", strip=True) + midas_yearly = MIDAS(low_freq="YS", strip=True) list_yearly_ts = midas_yearly.fit_transform(ts_to_transform) assert len(list_yearly_ts) == 2 assert len(list_yearly_ts[0]) == 0 - assert list_yearly_ts[0].freq == "AS" + assert list_yearly_ts[0].freq == "YS" assert list_yearly_ts[0].n_components == 12 # 4 quarters in a year @@ -456,13 +467,13 @@ def test_multiple_ts(self): inverse_transformed = midas_yearly.inverse_transform(list_yearly_ts) assert len(inverse_transformed) == 2 assert len(inverse_transformed[0]) == 0 - assert inverse_transformed[0].freq == "M" + assert inverse_transformed[0].freq == freq_month_end assert inverse_transformed[0].n_components == 1 assert ts_to_transform[1:] == inverse_transformed[1:] # ==> without stripping: first series will be partially empty - midas_yearly = MIDAS(low_freq="AS", strip=False) + midas_yearly = MIDAS(low_freq="YS", strip=False) list_yearly_ts = midas_yearly.fit_transform(ts_to_transform) # 12 months in a year, original ts contains only 9 values, the missing data are nan np.testing.assert_array_almost_equal( diff --git a/darts/tests/test_timeseries.py b/darts/tests/test_timeseries.py index 2a2b7c2788..31f2a5fa02 100644 --- a/darts/tests/test_timeseries.py +++ b/darts/tests/test_timeseries.py @@ -1155,8 +1155,8 @@ def test_resample_timeseries(self): pd_series = pd.Series(range(10), index=times) timeseries = TimeSeries.from_series(pd_series) - resampled_timeseries = timeseries.resample("H") - assert resampled_timeseries.freq_str == "H" + resampled_timeseries = timeseries.resample("h") + assert resampled_timeseries.freq_str.lower() == "h" assert resampled_timeseries.pd_series().at[pd.Timestamp("20130101020000")] == 0 assert resampled_timeseries.pd_series().at[pd.Timestamp("20130102020000")] == 1 assert resampled_timeseries.pd_series().at[pd.Timestamp("20130109090000")] == 8 From 0520c43e61aa5fd630f8dbddf03f0257b4e8045f Mon Sep 17 00:00:00 2001 From: Dennis Bader Date: Sun, 21 Jan 2024 15:11:39 +0100 Subject: [PATCH 11/13] fix wrong pandas freqstr for py38 (#2178) --- darts/dataprocessing/transformers/midas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/dataprocessing/transformers/midas.py b/darts/dataprocessing/transformers/midas.py index d54e86a851..bba870a31e 100644 --- a/darts/dataprocessing/transformers/midas.py +++ b/darts/dataprocessing/transformers/midas.py @@ -101,7 +101,7 @@ def __init__( ), logger=logger, ) - self._low_freq = low_freq + self._low_freq = pd.tseries.frequencies.to_offset(low_freq).freqstr self._strip = strip self._drop_static_covariates = drop_static_covariates self._sep = "_midas_" From 3ee3efd1b1d5acbbb461c7ecb810f95260402b18 Mon Sep 17 00:00:00 2001 From: Dennis Bader Date: Sun, 21 Jan 2024 15:23:58 +0100 Subject: [PATCH 12/13] bump u8darts 0.27.1 to 0.27.2 and update changelog (#2179) --- CHANGELOG.md | 14 +++++++++++--- setup_u8darts.py | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0a4c4e66d..d23107988e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,16 +6,24 @@ but cannot always guarantee backwards compatibility. Changes that may **break co ## [Unreleased](https://github.com/unit8co/darts/tree/master) -[Full Changelog](https://github.com/unit8co/darts/compare/0.27.1...master) +[Full Changelog](https://github.com/unit8co/darts/compare/0.27.2...master) +### For users of the library: +**Improved** + +**Fixed** + +### For developers of the library: + +## [0.27.2](https://github.com/unit8co/darts/tree/0.27.2) (2023-01-21) ### For users of the library: **Improved** - Added `darts.utils.statistics.plot_ccf` that can be used to plot the cross correlation between a time series (e.g. target series) and the lagged values of another time series (e.g. covariates series). [#2122](https://github.com/unit8co/darts/pull/2122) by [Dennis Bader](https://github.com/dennisbader). -- Improvements to `TimeSeries`: - - Improved the time series frequency inference when using slices or pandas DatetimeIndex as keys for `__getitem__`. [#2152](https://github.com/unit8co/darts/pull/2152) by [DavidKleindienst](https://github.com/DavidKleindienst). +- Improvements to `TimeSeries`: Improved the time series frequency inference when using slices or pandas DatetimeIndex as keys for `__getitem__`. [#2152](https://github.com/unit8co/darts/pull/2152) by [DavidKleindienst](https://github.com/DavidKleindienst). **Fixed** - Fixed a bug when using a `TorchForecastingModel` with `use_reversible_instance_norm=True` and predicting with `n > output_chunk_length`. The input normalized multiple times. [#2160](https://github.com/unit8co/darts/pull/2160) by [FourierMourier](https://github.com/FourierMourier). + ### For developers of the library: ## [0.27.1](https://github.com/unit8co/darts/tree/0.27.1) (2023-12-10) diff --git a/setup_u8darts.py b/setup_u8darts.py index 415c9bff76..2b1ef21104 100644 --- a/setup_u8darts.py +++ b/setup_u8darts.py @@ -29,7 +29,7 @@ def read_requirements(path): setup( name="u8darts", - version="0.27.1", + version="0.27.2", description="A python library for easy manipulation and forecasting of time series.", long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", From 20ee5ece4e02ed7c1e84db07679c83ceeb1f8a13 Mon Sep 17 00:00:00 2001 From: dennisbader Date: Sun, 21 Jan 2024 16:05:18 +0000 Subject: [PATCH 13/13] Release 0.27.2 --- .bumpversion.cfg | 2 +- conda_recipe/darts/meta.yaml | 2 +- darts/__init__.py | 2 +- docs/source/conf.py | 2 +- setup.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 9725a8232e..30c3988b4d 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,6 +1,6 @@ [bumpversion] parse = (?P\d+)\.(?P\d+)\.(?P\d+)|dev -current_version = 0.27.1 +current_version = 0.27.2 [bumpversion:file:setup.py] diff --git a/conda_recipe/darts/meta.yaml b/conda_recipe/darts/meta.yaml index caeb508ea7..714887eb3f 100644 --- a/conda_recipe/darts/meta.yaml +++ b/conda_recipe/darts/meta.yaml @@ -2,7 +2,7 @@ package: name: "darts" - version: "0.27.1" + version: "0.27.2" source: # root folder, not the package diff --git a/darts/__init__.py b/darts/__init__.py index 0a66a0aa59..2f75c40cfe 100644 --- a/darts/__init__.py +++ b/darts/__init__.py @@ -10,7 +10,7 @@ from .timeseries import TimeSeries, concatenate -__version__ = "0.27.1" +__version__ = "0.27.2" colors = cycler( color=["black", "003DFD", "b512b8", "11a9ba", "0d780f", "f77f07", "ba0f0f"] diff --git a/docs/source/conf.py b/docs/source/conf.py index 2aa3d5ece3..b260072f18 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -22,7 +22,7 @@ project = "darts" copyright = f"2020 - {datetime.now().year}, Unit8 SA (Apache 2.0 License)" author = "Unit8 SA" -version = "0.27.1" +version = "0.27.2" # -- General configuration --------------------------------------------------- diff --git a/setup.py b/setup.py index c268f8fbbd..aefd06f660 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ def read_requirements(path): setup( name="darts", - version="0.27.1", + version="0.27.2", description="A python library for easy manipulation and forecasting of time series.", long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown",