From 021c26eb8213f3bb31e931a39b4dd77bb78eb707 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Tue, 15 May 2018 10:01:43 +0100 Subject: [PATCH] [ML] Address instability associated with sudden changes in seasonal components (#91) --- docs/CHANGELOG.asciidoc | 1 + include/maths/CAdaptiveBucketing.h | 4 +- include/maths/CCalendarComponent.h | 4 - include/maths/CDecompositionComponent.h | 4 - include/maths/CSeasonalComponent.h | 4 - .../maths/CTimeSeriesDecompositionDetail.h | 193 ++++- lib/maths/CAdaptiveBucketing.cc | 64 +- lib/maths/CCalendarComponent.cc | 5 - lib/maths/CDecompositionComponent.cc | 17 - lib/maths/CSeasonalComponent.cc | 52 +- lib/maths/CTimeSeriesDecompositionDetail.cc | 796 +++++++++++------- lib/maths/CTimeSeriesModel.cc | 32 +- lib/maths/unittest/CForecastTest.cc | 142 ++-- lib/maths/unittest/CSeasonalComponentTest.cc | 6 +- .../unittest/CTimeSeriesDecompositionTest.cc | 599 ++++++------- .../unittest/CTimeSeriesDecompositionTest.h | 1 + lib/maths/unittest/CTimeSeriesModelTest.cc | 2 +- lib/model/unittest/CEventRateModelTest.cc | 2 +- 18 files changed, 1078 insertions(+), 850 deletions(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 0ca651ad08..8a5dd9001f 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -17,6 +17,7 @@ Improve and use periodic boundary condition for seasonal component modeling ({pull}84[#84]) Improve robustness w.r.t. outliers of detection and initialisation of seasonal components ({pull}90[#90]) +Improve behavior when there are abrupt changes in the seasonal components present in a time series ({pull}91[#91]) Explicit change point detection and modelling ({pull}92[#92]) Forecasting of Machine Learning job time series is now supported for large jobs by temporarily storing diff --git a/include/maths/CAdaptiveBucketing.h b/include/maths/CAdaptiveBucketing.h index ab3ede13ec..8a081e1f0a 100644 --- a/include/maths/CAdaptiveBucketing.h +++ b/include/maths/CAdaptiveBucketing.h @@ -233,10 +233,10 @@ class MATHS_EXPORT CAdaptiveBucketing { //! An IIR low pass filter for the total desired end point displacement //! in refine. - TFloatMeanAccumulator m_LpForce; + TFloatMeanAccumulator m_MeanDesiredDisplacement; //! The total desired end point displacement in refine. - TFloatMeanAccumulator m_Force; + TFloatMeanAccumulator m_MeanAbsDesiredDisplacement; }; } } diff --git a/include/maths/CCalendarComponent.h b/include/maths/CCalendarComponent.h index d5ec0b5fc1..eee3cd0163 100644 --- a/include/maths/CCalendarComponent.h +++ b/include/maths/CCalendarComponent.h @@ -131,10 +131,6 @@ class MATHS_EXPORT CCalendarComponent : private CDecompositionComponent { //! Get the mean variance of the component residuals. double meanVariance() const; - //! Get the maximum ratio between a residual variance and the mean - //! residual variance. - double heteroscedasticity() const; - //! Get a checksum for this object. uint64_t checksum(uint64_t seed = 0) const; diff --git a/include/maths/CDecompositionComponent.h b/include/maths/CDecompositionComponent.h index 7ae89d29a8..ee7c1fbd92 100644 --- a/include/maths/CDecompositionComponent.h +++ b/include/maths/CDecompositionComponent.h @@ -168,10 +168,6 @@ class MATHS_EXPORT CDecompositionComponent { //! Get the mean variance of the function residuals. double meanVariance() const; - //! Get the maximum ratio between a residual variance and the mean - //! residual variance. - double heteroscedasticity() const; - //! Get the maximum size to use for the bucketing. std::size_t maxSize() const; diff --git a/include/maths/CSeasonalComponent.h b/include/maths/CSeasonalComponent.h index 8c84207b5a..acf1bab420 100644 --- a/include/maths/CSeasonalComponent.h +++ b/include/maths/CSeasonalComponent.h @@ -160,10 +160,6 @@ class MATHS_EXPORT CSeasonalComponent : private CDecompositionComponent { //! Get the mean variance of the component residuals. double meanVariance() const; - //! Get the maximum ratio between a residual variance and the mean - //! residual variance. - double heteroscedasticity() const; - //! Get the covariance matrix of the regression parameters' at \p time. //! //! \param[in] time The time of interest. diff --git a/include/maths/CTimeSeriesDecompositionDetail.h b/include/maths/CTimeSeriesDecompositionDetail.h index 7b99bd6711..dc1f62514b 100644 --- a/include/maths/CTimeSeriesDecompositionDetail.h +++ b/include/maths/CTimeSeriesDecompositionDetail.h @@ -370,7 +370,10 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { virtual void handle(const SDetectedCalendar& message); //! Start using the trend for prediction. - void useTrendForPrediction(void); + void useTrendForPrediction(); + + //! Test to see if using the trend improves prediction accuracy. + bool shouldUseTrendForPrediction(); //! Apply \p shift to the level at \p time and \p value. void shiftLevel(core_t::TTime time, double value, double shift); @@ -440,6 +443,63 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { using TCalendarComponentPtrVec = std::vector; using TFloatMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; + //! \brief Manages the setting of the error gain when updating + //! the components with a value. + //! + //! DESCRIPTION:\n + //! The gain is the scale applied to the error in the prediction + //! when updating the components with a new value. If we think it + //! is safe, we use a large gain since this improves prediction + //! accuracy. However, this can also lead to instability if, for + //! example, the seasonal components present in the time series + //! suddenly change. When instability occurs it manifests as the + //! amplitude of all the components growing. + //! + //! This object therefore monitors the sum of the absolute component + //! amplitudes and decreases the gain when it detects that this is + //! significantly increasing. + class MATHS_EXPORT CGainController { + public: + //! Initialize by reading state from \p traverser. + bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser); + + //! Persist state by passing information to \p inserter. + void acceptPersistInserter(core::CStatePersistInserter& inserter) const; + + //! Clear all state. + void clear(); + + //! Get the gain to use when updating the components with a new value. + double gain() const; + + //! Add seed predictions \p predictions. + void seed(const TDoubleVec& predictions); + + //! Add the predictions \p predictions at \p time. + void add(core_t::TTime time, const TDoubleVec& predictions); + + //! Age by \p factor. + void age(double factor); + + //! Shift the mean prediction error regression model time origin + //! to \p time. + void shiftOrigin(core_t::TTime time); + + //! Get a checksum for this object. + uint64_t checksum(uint64_t seed) const; + + private: + using TRegression = CRegression::CLeastSquaresOnline<1>; + + private: + //! The origin for the mean prediction error regression model. + core_t::TTime m_RegressionOrigin = 0; + //! The sum of the absolute component predictions w.r.t. their means. + TFloatMeanAccumulator m_MeanSumAmplitudes; + //! A regression model for the absolute component predictions. + TRegression m_MeanSumAmplitudesTrend; + }; + //! \brief Tracks prediction errors with and without components. //! //! DESCRIPTION:\n @@ -456,19 +516,23 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! Update the errors. //! + //! \param[in] referenceError The reference error with no components. //! \param[in] error The prediction error. //! \param[in] prediction The prediction from the component. + //! \param[in] varianceIncrease The increase in predicted variance + //! due to the component. //! \param[in] weight The weight of \p error. - void add(double error, double prediction, double weight); + void add(double referenceError, + double error, + double prediction, + double varianceIncrease, + double weight); //! Clear the error statistics. void clear(); - //! Check if we should discard \p seasonal. - bool remove(core_t::TTime bucketLength, CSeasonalComponent& seasonal) const; - - //! Check if we should discard \p calendar. - bool remove(core_t::TTime bucketLength, CCalendarComponent& calendar) const; + //! Check if we should discard the component. + bool remove(core_t::TTime bucketLength, core_t::TTime period) const; //! Age the errors by \p factor. void age(double factor); @@ -477,22 +541,33 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { uint64_t checksum(uint64_t seed) const; private: - //! Truncate large, i.e. more than 6 sigma, errors. - static double winsorise(double squareError, const TFloatMeanAccumulator& variance); + using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; + using TVector = CVectorNx1; + using TVectorMeanAccumulator = CBasicStatistics::SSampleMean::TAccumulator; private: - //! The mean prediction error in the window. - TFloatMeanAccumulator m_MeanErrorWithComponent; + //! Truncate large, i.e. more than 6 sigma, errors. + TVector winsorise(const TVector& squareError) const; - //! The mean prediction error in the window without the component. - TFloatMeanAccumulator m_MeanErrorWithoutComponent; + private: + //! The vector mean errors: + //!
+            //! | excluding all components from the prediction |
+            //! |  including the component in the prediction   |
+            //! | excluding the component from the prediction  |
+            //! 
+ TVectorMeanAccumulator m_MeanErrors; + + //! The maximum increase in variance due to the component. + TMaxAccumulator m_MaxVarianceIncrease; }; using TComponentErrorsVec = std::vector; using TComponentErrorsPtrVec = std::vector; //! \brief The seasonal components of the decomposition. - struct MATHS_EXPORT SSeasonal { + class MATHS_EXPORT CSeasonal { + public: //! Initialize by reading state from \p traverser. bool acceptRestoreTraverser(double decayRate, core_t::TTime bucketLength, @@ -508,15 +583,26 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! - \p start elapsed time. void propagateForwards(core_t::TTime start, core_t::TTime end); + //! Clear the components' prediction errors. + void clearPredictionErrors(); + //! Get the combined size of the seasonal components. std::size_t size() const; + //! Get the components. + const maths_t::TSeasonalComponentVec& components() const; + //! Get the components. + maths_t::TSeasonalComponentVec& components(); + //! Get the state to update. void componentsErrorsAndDeltas(core_t::TTime time, TSeasonalComponentPtrVec& components, TComponentErrorsPtrVec& errors, TDoubleVec& deltas); + //! Append the predictions at \p time. + void appendPredictions(core_t::TTime time, TDoubleVec& predictions) const; + //! Check if we need to interpolate any of the components. bool shouldInterpolate(core_t::TTime time, core_t::TTime last) const; @@ -526,6 +612,23 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! Check if any of the components has been initialized. bool initialized() const; + //! Add and initialize a new component. + void add(const CSeasonalTime& seasonalTime, + std::size_t size, + double decayRate, + double bucketLength, + CSplineTypes::EBoundaryCondition boundaryCondition, + core_t::TTime startTime, + core_t::TTime endTime, + const TFloatMeanAccumulatorVec& values); + + //! Refresh state after adding new components. + void refreshForNewComponents(); + + //! Remove all components excluded by adding the component corresponding + //! to \p time. + void removeExcludedComponents(const CSeasonalTime& time); + //! Remove low value components bool prune(core_t::TTime time, core_t::TTime bucketLength); @@ -544,17 +647,19 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! Get the memory used by this object. std::size_t memoryUsage() const; - //! The seasonal components. - maths_t::TSeasonalComponentVec s_Components; + private: + //! The components. + maths_t::TSeasonalComponentVec m_Components; - //! The prediction errors relating to the component. - TComponentErrorsVec s_PredictionErrors; + //! The components' prediction errors. + TComponentErrorsVec m_PredictionErrors; }; - using TSeasonalPtr = std::shared_ptr; + using TSeasonalPtr = std::shared_ptr; //! \brief Calendar periodic components of the decomposition. - struct MATHS_EXPORT SCalendar { + class MATHS_EXPORT CCalendar { + public: //! Initialize by reading state from \p traverser. bool acceptRestoreTraverser(double decayRate, core_t::TTime bucketLength, @@ -570,9 +675,15 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! - \p start elapsed time. void propagateForwards(core_t::TTime start, core_t::TTime end); + //! Clear the components' prediction errors. + void clearPredictionErrors(); + //! Get the combined size of the seasonal components. std::size_t size() const; + //! Get the components. + const maths_t::TCalendarComponentVec& components() const; + //! Check if there is already a component for \p feature. bool haveComponent(CCalendarFeature feature) const; @@ -581,6 +692,9 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { TCalendarComponentPtrVec& components, TComponentErrorsPtrVec& errors); + //! Append the predictions at \p time. + void appendPredictions(core_t::TTime time, TDoubleVec& predictions) const; + //! Check if we need to interpolate any of the components. bool shouldInterpolate(core_t::TTime time, core_t::TTime last) const; @@ -590,6 +704,9 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! Check if any of the components has been initialized. bool initialized() const; + //! Add and initialize a new component. + void add(const CCalendarFeature& feature, std::size_t size, double decayRate, double bucketLength); + //! Remove low value components. bool prune(core_t::TTime time, core_t::TTime bucketLength); @@ -605,14 +722,15 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! Get the memory used by this object. std::size_t memoryUsage() const; + private: //! The calendar components. - maths_t::TCalendarComponentVec s_Components; + maths_t::TCalendarComponentVec m_Components; - //! The prediction errors after removing the component. - TComponentErrorsVec s_PredictionErrors; + //! The components' prediction errors. + TComponentErrorsVec m_PredictionErrors; }; - using TCalendarPtr = std::shared_ptr; + using TCalendarPtr = std::shared_ptr; private: //! Get the total size of the components. @@ -624,16 +742,10 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! Add new seasonal components to \p components. bool addSeasonalComponents(const CPeriodicityHypothesisTestsResult& result, const CExpandingWindow& window, - const TPredictor& predictor, - CTrendComponent& trend, - maths_t::TSeasonalComponentVec& components, - TComponentErrorsVec& errors) const; + const TPredictor& predictor); //! Add a new calendar component to \p components. - bool addCalendarComponent(const CCalendarFeature& feature, - core_t::TTime time, - maths_t::TCalendarComponentVec& components, - TComponentErrorsVec& errors) const; + bool addCalendarComponent(const CCalendarFeature& feature, core_t::TTime time); //! Reweight the outlier values in \p values. //! @@ -682,6 +794,11 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! The raw data bucketing interval. core_t::TTime m_BucketLength; + //! Sets the gain used when updating with a new value. + //! + //! \see CGainController for more details. + CGainController m_GainController; + //! The number of buckets to use to estimate a periodic component. std::size_t m_SeasonalComponentSize; @@ -700,17 +817,17 @@ class MATHS_EXPORT CTimeSeriesDecompositionDetail { //! The mean error variance scale for the components. TFloatMeanAccumulator m_MeanVarianceScale; - //! The moments of the values added. - TMeanVarAccumulator m_Moments; + //! The moments of the error in the predictions excluding the trend. + TMeanVarAccumulator m_PredictionErrorWithoutTrend; - //! The moments of the values added after subtracting a trend. - TMeanVarAccumulator m_MomentsMinusTrend; + //! The moments of the error in the predictions including the trend. + TMeanVarAccumulator m_PredictionErrorWithTrend; //! Set to true if the trend model should be used for prediction. - bool m_UsingTrendForPrediction; + bool m_UsingTrendForPrediction = false; //! Set to true if non-null when the seasonal components change. - bool* m_Watcher; + bool* m_Watcher = nullptr; }; }; diff --git a/lib/maths/CAdaptiveBucketing.cc b/lib/maths/CAdaptiveBucketing.cc index 24ec689d71..74f6040889 100644 --- a/lib/maths/CAdaptiveBucketing.cc +++ b/lib/maths/CAdaptiveBucketing.cc @@ -43,8 +43,8 @@ void clearAndShrink(std::vector& vector) { const std::string DECAY_RATE_TAG{"a"}; const std::string ENDPOINT_TAG{"b"}; const std::string CENTRES_TAG{"c"}; -const std::string LP_FORCE_TAG{"d"}; -const std::string FORCE_TAG{"e"}; +const std::string MEAN_DESIRED_DISPLACEMENT_TAG{"d"}; +const std::string MEAN_ABS_DESIRED_DISPLACEMENT_TAG{"e"}; const std::string EMPTY_STRING; const double SMOOTHING_FUNCTION[]{0.25, 0.5, 0.25}; @@ -61,8 +61,10 @@ bool CAdaptiveBucketing::acceptRestoreTraverser(core::CStateRestoreTraverser& tr RESTORE_BUILT_IN(DECAY_RATE_TAG, m_DecayRate) RESTORE(ENDPOINT_TAG, core::CPersistUtils::fromString(traverser.value(), m_Endpoints)) RESTORE(CENTRES_TAG, core::CPersistUtils::fromString(traverser.value(), m_Centres)) - RESTORE(LP_FORCE_TAG, m_LpForce.fromDelimited(traverser.value())) - RESTORE(FORCE_TAG, m_Force.fromDelimited(traverser.value())) + RESTORE(MEAN_DESIRED_DISPLACEMENT_TAG, + m_MeanDesiredDisplacement.fromDelimited(traverser.value())) + RESTORE(MEAN_ABS_DESIRED_DISPLACEMENT_TAG, + m_MeanAbsDesiredDisplacement.fromDelimited(traverser.value())) } while (traverser.next()); return true; } @@ -71,8 +73,10 @@ void CAdaptiveBucketing::acceptPersistInserter(core::CStatePersistInserter& inse inserter.insertValue(DECAY_RATE_TAG, m_DecayRate, core::CIEEE754::E_SinglePrecision); inserter.insertValue(ENDPOINT_TAG, core::CPersistUtils::toString(m_Endpoints)); inserter.insertValue(CENTRES_TAG, core::CPersistUtils::toString(m_Centres)); - inserter.insertValue(LP_FORCE_TAG, m_LpForce.toDelimited()); - inserter.insertValue(FORCE_TAG, m_Force.toDelimited()); + inserter.insertValue(MEAN_DESIRED_DISPLACEMENT_TAG, + m_MeanDesiredDisplacement.toDelimited()); + inserter.insertValue(MEAN_ABS_DESIRED_DISPLACEMENT_TAG, + m_MeanAbsDesiredDisplacement.toDelimited()); } CAdaptiveBucketing::CAdaptiveBucketing(double decayRate, double minimumBucketLength) @@ -92,8 +96,8 @@ void CAdaptiveBucketing::swap(CAdaptiveBucketing& other) { std::swap(m_MinimumBucketLength, other.m_MinimumBucketLength); m_Endpoints.swap(other.m_Endpoints); m_Centres.swap(other.m_Centres); - std::swap(m_LpForce, other.m_LpForce); - std::swap(m_Force, other.m_Force); + std::swap(m_MeanDesiredDisplacement, other.m_MeanDesiredDisplacement); + std::swap(m_MeanAbsDesiredDisplacement, other.m_MeanAbsDesiredDisplacement); } bool CAdaptiveBucketing::initialized() const { @@ -180,9 +184,8 @@ double CAdaptiveBucketing::decayRate() const { } void CAdaptiveBucketing::age(double factor) { - factor = factor * factor; - m_LpForce.age(factor); - m_Force.age(factor); + m_MeanDesiredDisplacement.age(factor); + m_MeanAbsDesiredDisplacement.age(factor); } double CAdaptiveBucketing::minimumBucketLength() const { @@ -282,22 +285,23 @@ void CAdaptiveBucketing::refine(core_t::TTime time) { } m_Endpoints[n] = b; } else { - // Noise in the bucket mean values creates a "high" - // frequency mean zero driving force on the buckets' - // end points desired positions. Once they have stabilized - // on their desired location for the trend, we are able - // to detect this by comparing an IIR low pass filtered - // force and the total force. The lower the ratio the - // smaller the force we actually apply. Note we want to - // damp the noise out because the process of adjusting - // the buckets values loses a small amount of information, - // see the comments at the start of refresh for more - // details. - double alpha{ALPHA * (CBasicStatistics::mean(m_Force) == 0.0 - ? 1.0 - : std::fabs(CBasicStatistics::mean(m_LpForce)) / - CBasicStatistics::mean(m_Force))}; - double force{0.0}; + // Noise in the bucket mean values creates a "high" frequency + // mean zero driving force on the buckets' end points desired + // positions. Once they have stabilized on their desired location + // for the trend, we are able to detect this by comparing the + // time averaged desired displacement and the absolute desired + // displacement. The lower the ratio the smaller more smoothing + // we apply. Note we want to damp the noise out because the + // process of adjusting the buckets end points loses a small + // amount of information, see the comments at the start of + // refresh for more details. + double alpha{ + ALPHA * (CBasicStatistics::mean(m_MeanAbsDesiredDisplacement) == 0.0 + ? 1.0 + : std::fabs(CBasicStatistics::mean(m_MeanDesiredDisplacement)) / + CBasicStatistics::mean(m_MeanAbsDesiredDisplacement))}; + LOG_TRACE(<< "alpha = " << alpha); + double displacement{0.0}; // Linearly interpolate between the current end points // and points separated by equal total averaging error. @@ -315,7 +319,7 @@ void CAdaptiveBucketing::refine(core_t::TTime time) { for (double e_ = step - (error - e); error >= step; e_ += step, error -= step) { double x{h * e_ / averagingErrors[i]}; m_Endpoints[j] = endpoints[j] + alpha * (ai + x - endpoints[j]); - force += (ai + x) - endpoints[j]; + displacement += (ai + x) - endpoints[j]; LOG_TRACE(<< "interval averaging error = " << e << ", a(i) = " << ai << ", x = " << x << ", endpoint " << endpoints[j] << " -> " << ai + x); @@ -333,8 +337,8 @@ void CAdaptiveBucketing::refine(core_t::TTime time) { m_Endpoints[n] = b; LOG_TRACE(<< "refinedEndpoints = " << core::CContainerPrinter::print(m_Endpoints)); - m_LpForce.add(force); - m_Force.add(std::fabs(force)); + m_MeanDesiredDisplacement.add(displacement); + m_MeanAbsDesiredDisplacement.add(std::fabs(displacement)); } this->refresh(endpoints); diff --git a/lib/maths/CCalendarComponent.cc b/lib/maths/CCalendarComponent.cc index 93dcd5c382..0e5cadfd18 100644 --- a/lib/maths/CCalendarComponent.cc +++ b/lib/maths/CCalendarComponent.cc @@ -122,7 +122,6 @@ void CCalendarComponent::interpolate(core_t::TTime time, bool refine) { if (refine) { m_Bucketing.refine(time); } - TDoubleVec knots; TDoubleVec values; TDoubleVec variances; @@ -167,10 +166,6 @@ double CCalendarComponent::meanVariance() const { return this->CDecompositionComponent::meanVariance(); } -double CCalendarComponent::heteroscedasticity() const { - return this->CDecompositionComponent::heteroscedasticity(); -} - uint64_t CCalendarComponent::checksum(uint64_t seed) const { seed = this->CDecompositionComponent::checksum(seed); return CChecksum::calculate(seed, m_Bucketing); diff --git a/lib/maths/CDecompositionComponent.cc b/lib/maths/CDecompositionComponent.cc index e2aba37be2..4d57bc26c3 100644 --- a/lib/maths/CDecompositionComponent.cc +++ b/lib/maths/CDecompositionComponent.cc @@ -187,23 +187,6 @@ double CDecompositionComponent::meanVariance() const { return m_MeanVariance; } -double CDecompositionComponent::heteroscedasticity() const { - if (m_MeanVariance == 0.0) { - return 0.0; - } - - using TMaxAccumulator = CBasicStatistics::SMax::TAccumulator; - - TMaxAccumulator result; - - TSplineCRef spline = this->varianceSpline(); - for (const auto& value : spline.values()) { - result.add(value / m_MeanVariance); - } - - return result.count() > 0 ? result[0] : 0.0; -} - std::size_t CDecompositionComponent::maxSize() const { return std::max(m_MaxSize, MIN_MAX_SIZE); } diff --git a/lib/maths/CSeasonalComponent.cc b/lib/maths/CSeasonalComponent.cc index 15588eb69b..ecb22ae884 100644 --- a/lib/maths/CSeasonalComponent.cc +++ b/lib/maths/CSeasonalComponent.cc @@ -23,7 +23,7 @@ #include #include -#include +#include #include namespace ml { @@ -153,7 +153,6 @@ void CSeasonalComponent::interpolate(core_t::TTime time, bool refine) { if (refine) { m_Bucketing.refine(time); } - TDoubleVec knots; TDoubleVec values; TDoubleVec variances; @@ -190,19 +189,15 @@ double CSeasonalComponent::meanValue() const { double CSeasonalComponent::delta(core_t::TTime time, core_t::TTime shortPeriod, - double shortPeriodValue) const { - using TMinAccumulator = CBasicStatistics::SMin::TAccumulator; - using TMinMaxAccumulator = CBasicStatistics::CMinMax; - + double shortDifference) const { // This is used to adjust how periodic patterns in the trend are // represented in the case that we have two periodic components // one of which is a divisor of the other. We are interested in // two situations: // 1) The long component has a bias at this time, w.r.t. its // mean, for all repeats of short component, - // 2) There is a large difference between the values and the - // mean of the long component for all repeats of the short - // period at this time. + // 2) The long and short components partially cancel at the + // specified time. // In the first case we can represent the bias using the short // seasonal component; we prefer to do this since the resolution // is better. In the second case we have a bad decomposition of @@ -211,33 +206,34 @@ double CSeasonalComponent::delta(core_t::TTime time, // periodic features in long component. We can achieve this by // reducing the value in the short seasonal component. + using TMinMaxAccumulator = CBasicStatistics::CMinMax; + const CSeasonalTime& time_{this->time()}; core_t::TTime longPeriod{time_.period()}; if (longPeriod > shortPeriod && longPeriod % shortPeriod == 0) { - TMinAccumulator min; - TMinMaxAccumulator minmax; + TMinMaxAccumulator bias; + double amplitude{0.0}; + double margin{std::fabs(shortDifference)}; + double cancelling{0.0}; double mean{this->CDecompositionComponent::meanValue()}; for (core_t::TTime t = time; t < time + longPeriod; t += shortPeriod) { if (time_.inWindow(t)) { double difference{CBasicStatistics::mean(this->value(t, 0.0)) - mean}; - min.add(std::fabs(difference)); - minmax.add(difference); + bias.add(difference); + amplitude = std::max(amplitude, std::fabs(difference)); + if (shortDifference * difference < 0.0) { + margin = std::min(margin, std::fabs(difference)); + cancelling += 1.0; + } else { + cancelling -= 1.0; + } } } - - if (std::fabs(minmax.signMargin()) > 0.0) { - return minmax.signMargin(); - } - - // We have a smooth decision boundary for whether to apply - // a delta for the case that the difference from the mean - // is 1/3 of the range. We force the delta to zero for values - // significantly smaller than this. - double scale{CTools::logisticFunction(3.0 * min[0] / minmax.range(), 0.1, 1.0)}; - scale = CTools::truncate(1.002 * scale - 0.001, 0.0, 1.0); - - return -scale * min[0] * CTools::sign(shortPeriodValue); + return bias.signMargin() != 0.0 ? bias.signMargin() + : (cancelling > 0.0 && margin > 0.2 * amplitude + ? std::copysign(margin, -shortDifference) + : 0.0); } return 0.0; @@ -253,10 +249,6 @@ double CSeasonalComponent::meanVariance() const { return this->CDecompositionComponent::meanVariance(); } -double CSeasonalComponent::heteroscedasticity() const { - return this->CDecompositionComponent::heteroscedasticity(); -} - bool CSeasonalComponent::covariances(core_t::TTime time, TMatrix& result) const { result = TMatrix(0.0); diff --git a/lib/maths/CTimeSeriesDecompositionDetail.cc b/lib/maths/CTimeSeriesDecompositionDetail.cc index e06d69bba5..9bde1a5fbe 100644 --- a/lib/maths/CTimeSeriesDecompositionDetail.cc +++ b/lib/maths/CTimeSeriesDecompositionDetail.cc @@ -74,6 +74,17 @@ const core_t::TTime DAY = core::constants::DAY; const core_t::TTime WEEK = core::constants::WEEK; const core_t::TTime MONTH = 4 * WEEK; +//! We scale the time used for the regression model to improve +//! the condition of the design matrix. +double scaleTime(core_t::TTime time, core_t::TTime origin) { + return static_cast(time - origin) / static_cast(core::constants::WEEK); +} + +//! Get the aging factor to apply for \p dt elapsed time. +double ageFactor(double decayRate, core_t::TTime dt, core_t::TTime scale = DAY) { + return std::exp(-decayRate * static_cast(dt) / static_cast(scale)); +} + //! Compute the mean of \p mean of \p components. template double meanOf(MEAN_FUNCTION mean, const TSeasonalComponentVec& components) { @@ -112,7 +123,7 @@ double meanOf(MEAN_FUNCTION mean, const TSeasonalComponentVec& components) { //! Compute the values to add to the trend and each component. //! -//! \param[in] trend The long term trend. +//! \param[in] trend The long term trend prediction. //! \param[in] seasonal The seasonal components. //! \param[in] calendar The calendar components. //! \param[in] time The time of value to decompose. @@ -122,21 +133,24 @@ double meanOf(MEAN_FUNCTION mean, const TSeasonalComponentVec& components) { //! \param[in,out] decomposition Updated to contain the value to //! add to each by component. //! \param[out] predictions Filled in with the component predictions. +//! \param[out] referenceError Filled in with the error w.r.t. the trend. //! \param[out] error Filled in with the prediction error. //! \param[out] scale Filled in with the normalization scaling. -void decompose(const CTrendComponent& trend, +void decompose(double trend, const TSeasonalComponentPtrVec& seasonal, const TCalendarComponentPtrVec& calendar, - core_t::TTime time, + const core_t::TTime time, const TDoubleVec& deltas, + double gain, TDoubleVec& decomposition, TDoubleVec& predictions, + double& referenceError, double& error, double& scale) { std::size_t m{seasonal.size()}; std::size_t n{calendar.size()}; - double x0{CBasicStatistics::mean(trend.value(time, 0.0))}; + double x0{trend}; TDoubleVec x(m + n); double xhat{x0}; for (std::size_t i = 0u; i < m; ++i) { @@ -159,9 +173,10 @@ void decompose(const CTrendComponent& trend, // trade off between the rate at which each component reacts // to errors verses the error variance in the steady state with // smaller values of Z corresponding to greater responsiveness. - double Z{std::max(0.5 * static_cast(m + n + 1), 1.0)}; + double Z{std::max(static_cast(m + n + 1) / gain, 1.0)}; error = decomposition[0] - xhat; + referenceError = decomposition[0] - x0; decomposition[0] = x0 + (decomposition[0] - xhat) / Z; for (std::size_t i = 0u; i < m; ++i) { predictions[i] = x[i] - seasonal[i]->meanValue(); @@ -251,6 +266,7 @@ const TSizeVecVec SC_TRANSITION_FUNCTION{ TSizeVec{SC_NORMAL, SC_NORMAL, SC_NORMAL, SC_NORMAL}}; const std::string VERSION_6_3_TAG("6.3"); +const std::string VERSION_6_4_TAG("6.4"); // Periodicity Test Tags // Version 6.3 @@ -267,6 +283,12 @@ const std::string CALENDAR_TEST_6_3_TAG{"c"}; // These work for all versions. // Components Tags +// Version 6.4 +const std::string COMPONENT_6_4_TAG{"f"}; +const std::string ERRORS_6_4_TAG{"g"}; +const std::string REGRESSION_ORIGIN_6_4_TAG{"a"}; +const std::string MEAN_SUM_AMPLITUDES_6_4_TAG{"b"}; +const std::string MEAN_SUM_AMPLITUDES_TREND_6_4_TAG{"c"}; // Version 6.3 const std::string COMPONENTS_MACHINE_6_3_TAG{"a"}; const std::string DECAY_RATE_6_3_TAG{"b"}; @@ -274,18 +296,18 @@ const std::string TREND_6_3_TAG{"c"}; const std::string SEASONAL_6_3_TAG{"d"}; const std::string CALENDAR_6_3_TAG{"e"}; const std::string COMPONENT_6_3_TAG{"f"}; -const std::string ERRORS_6_3_TAG{"g"}; const std::string MEAN_VARIANCE_SCALE_6_3_TAG{"h"}; const std::string MOMENTS_6_3_TAG{"i"}; const std::string MOMENTS_MINUS_TREND_6_3_TAG{"j"}; const std::string USING_TREND_FOR_PREDICTION_6_3_TAG{"k"}; +const std::string GAIN_CONTROLLER_6_3_TAG{"l"}; + // Version < 6.3 const std::string COMPONENTS_MACHINE_OLD_TAG{"a"}; const std::string TREND_OLD_TAG{"b"}; const std::string SEASONAL_OLD_TAG{"c"}; const std::string CALENDAR_OLD_TAG{"d"}; const std::string COMPONENT_OLD_TAG{"e"}; -const std::string ERRORS_OLD_TAG{"f"}; const std::string REGRESSION_OLD_TAG{"g"}; const std::string VARIANCE_OLD_TAG{"h"}; const std::string TIME_ORIGIN_OLD_TAG{"i"}; @@ -332,7 +354,6 @@ bool upgradeTrendModelToVersion6p3(const core_t::TTime bucketLength, //////////////////////////////////////////////////////////////////////// // Constants -const core_t::TTime FOREVER{boost::numeric::bounds::highest()}; const std::size_t MAXIMUM_COMPONENTS{8}; const TSeasonalComponentVec NO_SEASONAL_COMPONENTS; const TCalendarComponentVec NO_CALENDAR_COMPONENTS; @@ -937,19 +958,20 @@ CTimeSeriesDecompositionDetail::CComponents::CComponents(double decayRate, std::size_t seasonalComponentSize) : m_Machine{core::CStateMachine::create(SC_ALPHABET, SC_STATES, SC_TRANSITION_FUNCTION, SC_NORMAL)}, m_DecayRate{decayRate}, m_BucketLength{bucketLength}, m_SeasonalComponentSize{seasonalComponentSize}, - m_CalendarComponentSize{seasonalComponentSize / 3}, m_Trend{decayRate}, - m_UsingTrendForPrediction{false}, m_Watcher{nullptr} { + m_CalendarComponentSize{seasonalComponentSize / 3}, m_Trend{decayRate} { } CTimeSeriesDecompositionDetail::CComponents::CComponents(const CComponents& other) : m_Machine{other.m_Machine}, m_DecayRate{other.m_DecayRate}, - m_BucketLength{other.m_BucketLength}, m_SeasonalComponentSize{other.m_SeasonalComponentSize}, + m_BucketLength{other.m_BucketLength}, m_GainController{other.m_GainController}, + m_SeasonalComponentSize{other.m_SeasonalComponentSize}, m_CalendarComponentSize{other.m_CalendarComponentSize}, m_Trend{other.m_Trend}, - m_Seasonal{other.m_Seasonal ? new SSeasonal{*other.m_Seasonal} : nullptr}, - m_Calendar{other.m_Calendar ? new SCalendar{*other.m_Calendar} : nullptr}, - m_MeanVarianceScale{other.m_MeanVarianceScale}, m_Moments{other.m_Moments}, - m_MomentsMinusTrend{other.m_MomentsMinusTrend}, - m_UsingTrendForPrediction{other.m_UsingTrendForPrediction}, m_Watcher{nullptr} { + m_Seasonal{other.m_Seasonal ? new CSeasonal{*other.m_Seasonal} : nullptr}, + m_Calendar{other.m_Calendar ? new CCalendar{*other.m_Calendar} : nullptr}, + m_MeanVarianceScale{other.m_MeanVarianceScale}, + m_PredictionErrorWithoutTrend{other.m_PredictionErrorWithoutTrend}, + m_PredictionErrorWithTrend{other.m_PredictionErrorWithTrend}, + m_UsingTrendForPrediction{other.m_UsingTrendForPrediction} { } bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser( @@ -962,26 +984,30 @@ bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser( traverser.traverseSubLevel(boost::bind( &core::CStateMachine::acceptRestoreTraverser, &m_Machine, _1))); RESTORE_BUILT_IN(DECAY_RATE_6_3_TAG, m_DecayRate); + RESTORE(GAIN_CONTROLLER_6_3_TAG, + traverser.traverseSubLevel(boost::bind(&CGainController::acceptRestoreTraverser, + &m_GainController, _1))) RESTORE(TREND_6_3_TAG, traverser.traverseSubLevel(boost::bind( &CTrendComponent::acceptRestoreTraverser, &m_Trend, boost::cref(params), _1))) RESTORE_SETUP_TEARDOWN( - SEASONAL_6_3_TAG, m_Seasonal.reset(new SSeasonal), + SEASONAL_6_3_TAG, m_Seasonal.reset(new CSeasonal), traverser.traverseSubLevel( - boost::bind(&SSeasonal::acceptRestoreTraverser, + boost::bind(&CSeasonal::acceptRestoreTraverser, m_Seasonal.get(), m_DecayRate, m_BucketLength, _1)), /**/) RESTORE_SETUP_TEARDOWN( - CALENDAR_6_3_TAG, m_Calendar.reset(new SCalendar), + CALENDAR_6_3_TAG, m_Calendar.reset(new CCalendar), traverser.traverseSubLevel( - boost::bind(&SCalendar::acceptRestoreTraverser, + boost::bind(&CCalendar::acceptRestoreTraverser, m_Calendar.get(), m_DecayRate, m_BucketLength, _1)), /**/) RESTORE(MEAN_VARIANCE_SCALE_6_3_TAG, m_MeanVarianceScale.fromDelimited(traverser.value())) - RESTORE(MOMENTS_6_3_TAG, m_Moments.fromDelimited(traverser.value())); + RESTORE(MOMENTS_6_3_TAG, + m_PredictionErrorWithoutTrend.fromDelimited(traverser.value())); RESTORE(MOMENTS_MINUS_TREND_6_3_TAG, - m_MomentsMinusTrend.fromDelimited(traverser.value())); + m_PredictionErrorWithTrend.fromDelimited(traverser.value())); RESTORE_BUILT_IN(USING_TREND_FOR_PREDICTION_6_3_TAG, m_UsingTrendForPrediction) } @@ -1000,15 +1026,15 @@ bool CTimeSeriesDecompositionDetail::CComponents::acceptRestoreTraverser( m_BucketLength, boost::ref(m_Trend), _1)), m_UsingTrendForPrediction = true) RESTORE_SETUP_TEARDOWN( - SEASONAL_OLD_TAG, m_Seasonal.reset(new SSeasonal), + SEASONAL_OLD_TAG, m_Seasonal.reset(new CSeasonal), traverser.traverseSubLevel( - boost::bind(&SSeasonal::acceptRestoreTraverser, + boost::bind(&CSeasonal::acceptRestoreTraverser, m_Seasonal.get(), m_DecayRate, m_BucketLength, _1)), /**/) RESTORE_SETUP_TEARDOWN( - CALENDAR_OLD_TAG, m_Calendar.reset(new SCalendar), + CALENDAR_OLD_TAG, m_Calendar.reset(new CCalendar), traverser.traverseSubLevel( - boost::bind(&SCalendar::acceptRestoreTraverser, + boost::bind(&CCalendar::acceptRestoreTraverser, m_Calendar.get(), m_DecayRate, m_BucketLength, _1)), /**/) } while (traverser.next()); @@ -1026,19 +1052,23 @@ void CTimeSeriesDecompositionDetail::CComponents::acceptPersistInserter( COMPONENTS_MACHINE_6_3_TAG, boost::bind(&core::CStateMachine::acceptPersistInserter, &m_Machine, _1)); inserter.insertValue(DECAY_RATE_6_3_TAG, m_DecayRate, core::CIEEE754::E_SinglePrecision); + inserter.insertLevel(GAIN_CONTROLLER_6_3_TAG, + boost::bind(&CGainController::acceptPersistInserter, + &m_GainController, _1)); inserter.insertLevel(TREND_6_3_TAG, boost::bind(&CTrendComponent::acceptPersistInserter, m_Trend, _1)); if (m_Seasonal) { - inserter.insertLevel(SEASONAL_6_3_TAG, boost::bind(&SSeasonal::acceptPersistInserter, + inserter.insertLevel(SEASONAL_6_3_TAG, boost::bind(&CSeasonal::acceptPersistInserter, m_Seasonal.get(), _1)); } if (m_Calendar) { - inserter.insertLevel(CALENDAR_6_3_TAG, boost::bind(&SCalendar::acceptPersistInserter, + inserter.insertLevel(CALENDAR_6_3_TAG, boost::bind(&CCalendar::acceptPersistInserter, m_Calendar.get(), _1)); } inserter.insertValue(MEAN_VARIANCE_SCALE_6_3_TAG, m_MeanVarianceScale.toDelimited()); - inserter.insertValue(MOMENTS_6_3_TAG, m_Moments.toDelimited()); - inserter.insertValue(MOMENTS_MINUS_TREND_6_3_TAG, m_MomentsMinusTrend.toDelimited()); + inserter.insertValue(MOMENTS_6_3_TAG, m_PredictionErrorWithoutTrend.toDelimited()); + inserter.insertValue(MOMENTS_MINUS_TREND_6_3_TAG, + m_PredictionErrorWithTrend.toDelimited()); inserter.insertValue(USING_TREND_FOR_PREDICTION_6_3_TAG, m_UsingTrendForPrediction); } @@ -1051,9 +1081,10 @@ void CTimeSeriesDecompositionDetail::CComponents::swap(CComponents& other) { m_Trend.swap(other.m_Trend); m_Seasonal.swap(other.m_Seasonal); m_Calendar.swap(other.m_Calendar); + std::swap(m_GainController, other.m_GainController); std::swap(m_MeanVarianceScale, other.m_MeanVarianceScale); - std::swap(m_Moments, other.m_Moments); - std::swap(m_MomentsMinusTrend, other.m_MomentsMinusTrend); + std::swap(m_PredictionErrorWithoutTrend, other.m_PredictionErrorWithoutTrend); + std::swap(m_PredictionErrorWithTrend, other.m_PredictionErrorWithTrend); std::swap(m_UsingTrendForPrediction, other.m_UsingTrendForPrediction); } @@ -1066,8 +1097,6 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue& messag core_t::TTime time{message.s_Time}; double value{message.s_Value}; double trend{message.s_Trend}; - double seasonal{message.s_Seasonal}; - double calendar{message.s_Calendar}; const maths_t::TDoubleWeightsAry& weights{message.s_Weights}; TSeasonalComponentPtrVec seasonalComponents; @@ -1089,55 +1118,57 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SAddValue& messag std::size_t n{calendarComponents.size()}; TDoubleVec values(m + n + 1, value); - TDoubleVec predictions(m + n); + TDoubleVec predictions(m + n, 0.0); + double referenceError; double error; double scale; - decompose(m_Trend, seasonalComponents, calendarComponents, time, deltas, - values, predictions, error, scale); + decompose(trend, seasonalComponents, calendarComponents, time, deltas, + m_GainController.gain(), values, predictions, referenceError, + error, scale); + + TDoubleVec variances(m + n + 1, 0.0); + if (m_UsingTrendForPrediction) { + variances[0] = CBasicStatistics::mean(m_Trend.variance(0.0)); + } + for (std::size_t i = 1u; i <= m; ++i) { + variances[i] = CBasicStatistics::mean( + seasonalComponents[i - 1]->variance(time, 0.0)); + } + for (std::size_t i = m + 1; i <= m + n; ++i) { + variances[i] = CBasicStatistics::mean( + calendarComponents[i - m - 1]->variance(time, 0.0)); + } + double variance{std::accumulate(variances.begin(), variances.end(), 0.0)}; + double expectedVarianceIncrease{1.0 / static_cast(m + n + 1)}; - core_t::TTime observedInterval{m_Trend.observedInterval()}; + bool mayUseTrendForPrediction{m_Trend.observedInterval() > 6 * m_BucketLength}; m_Trend.add(time, values[0], weight); m_Trend.dontShiftLevel(time, value); for (std::size_t i = 1u; i <= m; ++i) { CSeasonalComponent* component{seasonalComponents[i - 1]}; CComponentErrors* error_{seasonalErrors[i - 1]}; + double varianceIncrease{variances[i] / variance / expectedVarianceIncrease}; component->add(time, values[i], weight); - error_->add(error, predictions[i - 1], weight); + error_->add(referenceError, error, predictions[i - 1], varianceIncrease, weight); } for (std::size_t i = m + 1; i <= m + n; ++i) { CCalendarComponent* component{calendarComponents[i - m - 1]}; CComponentErrors* error_{calendarErrors[i - m - 1]}; + double varianceIncrease{variances[i] / variance / expectedVarianceIncrease}; component->add(time, values[i], weight); - error_->add(error, predictions[i - 1], weight); + error_->add(referenceError, error, predictions[i - 1], varianceIncrease, weight); } m_MeanVarianceScale.add(scale, weight); - m_Moments.add(value - seasonal - calendar, weight); - m_MomentsMinusTrend.add(value - trend - seasonal - calendar, weight); - - if (!m_UsingTrendForPrediction && observedInterval > 6 * m_BucketLength) { - double v0{CBasicStatistics::variance(m_Moments)}; - double v1{CBasicStatistics::variance(m_MomentsMinusTrend)}; - double df0{CBasicStatistics::count(m_Moments) - 1.0}; - double df1{CBasicStatistics::count(m_MomentsMinusTrend) - m_Trend.parameters()}; - if (df0 > 0.0 && df1 > 0.0 && v0 > 0.0) { - double relativeLogSignificance{ - CTools::fastLog(CStatisticalTests::leftTailFTest(v1 / v0, df1, df0)) / - LOG_COMPONENT_STATISTICALLY_SIGNIFICANCE}; - double vt{*std::max_element( - boost::begin(COMPONENT_SIGNIFICANT_VARIANCE_REDUCTION), - boost::end(COMPONENT_SIGNIFICANT_VARIANCE_REDUCTION)) * - v0}; - double p{CTools::logisticFunction(relativeLogSignificance, 0.1, 1.0) * - (vt > v1 ? CTools::logisticFunction(vt / v1, 1.0, 1.0, +1.0) - : CTools::logisticFunction(v1 / vt, 0.1, 1.0, -1.0))}; - m_UsingTrendForPrediction = (p >= 0.25); - if (m_UsingTrendForPrediction) { - LOG_DEBUG(<< "Detected trend at " << time); - } - *m_Watcher = m_UsingTrendForPrediction; - } + m_PredictionErrorWithoutTrend.add(error + trend, weight); + m_PredictionErrorWithTrend.add(error, weight); + m_GainController.add(time, predictions); + + if (mayUseTrendForPrediction && !m_UsingTrendForPrediction && + this->shouldUseTrendForPrediction()) { + LOG_DEBUG(<< "Detected trend at " << time); + *m_Watcher = true; } } break; case SC_DISABLED: @@ -1158,7 +1189,7 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedSeasonal case SC_NORMAL: case SC_NEW_COMPONENTS: { if (!m_Seasonal) { - m_Seasonal.reset(new SSeasonal); + m_Seasonal.reset(new CSeasonal); } core_t::TTime time{message.s_Time}; @@ -1167,11 +1198,7 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedSeasonal const CExpandingWindow& window{message.s_Window}; const TPredictor& predictor{message.s_Predictor}; - TSeasonalComponentVec& components{m_Seasonal->s_Components}; - TComponentErrorsVec& errors{m_Seasonal->s_PredictionErrors}; - - if (!this->addSeasonalComponents(result, window, predictor, m_Trend, - components, errors)) { + if (!this->addSeasonalComponents(result, window, predictor)) { break; } if (m_Watcher) { @@ -1204,7 +1231,7 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedCalendar case SC_NORMAL: case SC_NEW_COMPONENTS: { if (!m_Calendar) { - m_Calendar.reset(new SCalendar); + m_Calendar.reset(new CCalendar); } core_t::TTime time{message.s_Time}; @@ -1215,10 +1242,7 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedCalendar break; } - TCalendarComponentVec& components{m_Calendar->s_Components}; - TComponentErrorsVec& errors{m_Calendar->s_PredictionErrors}; - - this->addCalendarComponent(feature, time, components, errors); + this->addCalendarComponent(feature, time); this->apply(SC_ADDED_COMPONENTS, message); this->mediator()->forward( SNewComponents(time, lastTime, SNewComponents::E_CalendarCyclic)); @@ -1233,10 +1257,30 @@ void CTimeSeriesDecompositionDetail::CComponents::handle(const SDetectedCalendar } } -void CTimeSeriesDecompositionDetail::CComponents::useTrendForPrediction(void) { +void CTimeSeriesDecompositionDetail::CComponents::useTrendForPrediction() { m_UsingTrendForPrediction = true; } +bool CTimeSeriesDecompositionDetail::CComponents::shouldUseTrendForPrediction() { + double v0{CBasicStatistics::variance(m_PredictionErrorWithoutTrend)}; + double v1{CBasicStatistics::variance(m_PredictionErrorWithTrend)}; + double df0{CBasicStatistics::count(m_PredictionErrorWithoutTrend) - 1.0}; + double df1{CBasicStatistics::count(m_PredictionErrorWithTrend) - m_Trend.parameters()}; + if (df0 > 0.0 && df1 > 0.0 && v0 > 0.0) { + double relativeLogSignificance{ + CTools::fastLog(CStatisticalTests::leftTailFTest(v1 / v0, df1, df0)) / + LOG_COMPONENT_STATISTICALLY_SIGNIFICANCE}; + double vt{*std::max_element(boost::begin(COMPONENT_SIGNIFICANT_VARIANCE_REDUCTION), + boost::end(COMPONENT_SIGNIFICANT_VARIANCE_REDUCTION)) * + v0}; + double p{CTools::logisticFunction(relativeLogSignificance, 0.1, 1.0) * + (vt > v1 ? CTools::logisticFunction(vt / v1, 1.0, 1.0, +1.0) + : CTools::logisticFunction(v1 / vt, 0.1, 1.0, -1.0))}; + m_UsingTrendForPrediction = (p >= 0.25); + } + return m_UsingTrendForPrediction; +} + void CTimeSeriesDecompositionDetail::CComponents::shiftLevel(core_t::TTime time, double value, double shift) { @@ -1324,11 +1368,11 @@ void CTimeSeriesDecompositionDetail::CComponents::propagateForwards(core_t::TTim if (m_Calendar) { m_Calendar->propagateForwards(start, end); } - double factor{std::exp(-m_DecayRate * static_cast(end - start) / - static_cast(DAY))}; + double factor{ageFactor(m_DecayRate, end - start)}; m_MeanVarianceScale.age(factor); - m_Moments.age(factor); - m_MomentsMinusTrend.age(factor); + m_PredictionErrorWithTrend.age(factor); + m_PredictionErrorWithoutTrend.age(factor); + m_GainController.age(factor); } bool CTimeSeriesDecompositionDetail::CComponents::initialized() const { @@ -1345,12 +1389,12 @@ const CTrendComponent& CTimeSeriesDecompositionDetail::CComponents::trend() cons } const TSeasonalComponentVec& CTimeSeriesDecompositionDetail::CComponents::seasonal() const { - return m_Seasonal ? m_Seasonal->s_Components : NO_SEASONAL_COMPONENTS; + return m_Seasonal ? m_Seasonal->components() : NO_SEASONAL_COMPONENTS; } const maths_t::TCalendarComponentVec& CTimeSeriesDecompositionDetail::CComponents::calendar() const { - return m_Calendar ? m_Calendar->s_Components : NO_CALENDAR_COMPONENTS; + return m_Calendar ? m_Calendar->components() : NO_CALENDAR_COMPONENTS; } bool CTimeSeriesDecompositionDetail::CComponents::usingTrendForPrediction() const { @@ -1404,8 +1448,9 @@ uint64_t CTimeSeriesDecompositionDetail::CComponents::checksum(uint64_t seed) co seed = CChecksum::calculate(seed, m_Seasonal); seed = CChecksum::calculate(seed, m_Calendar); seed = CChecksum::calculate(seed, m_MeanVarianceScale); - seed = CChecksum::calculate(seed, m_Moments); - seed = CChecksum::calculate(seed, m_MomentsMinusTrend); + seed = CChecksum::calculate(seed, m_PredictionErrorWithoutTrend); + seed = CChecksum::calculate(seed, m_PredictionErrorWithTrend); + seed = CChecksum::calculate(seed, m_GainController); return CChecksum::calculate(seed, m_UsingTrendForPrediction); } @@ -1432,14 +1477,12 @@ std::size_t CTimeSeriesDecompositionDetail::CComponents::maxSize() const { bool CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents( const CPeriodicityHypothesisTestsResult& result, const CExpandingWindow& window, - const TPredictor& predictor, - CTrendComponent& trend, - TSeasonalComponentVec& components, - TComponentErrorsVec& errors) const { + const TPredictor& predictor) { using TSeasonalTimePtr = std::shared_ptr; using TSeasonalTimePtrVec = std::vector; TSeasonalTimePtrVec newSeasonalTimes; + const TSeasonalComponentVec& components{m_Seasonal->components()}; for (const auto& candidate_ : result.components()) { TSeasonalTimePtr seasonalTime(candidate_.seasonalTime()); @@ -1454,12 +1497,7 @@ bool CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents( if (newSeasonalTimes.size() > 0) { for (const auto& seasonalTime : newSeasonalTimes) { - components.erase( - std::remove_if(components.begin(), components.end(), - [&seasonalTime](const CSeasonalComponent& component) { - return seasonalTime->excludes(component.time()); - }), - components.end()); + m_Seasonal->removeExcludedComponents(*seasonalTime); } std::sort(newSeasonalTimes.begin(), newSeasonalTimes.end(), @@ -1498,13 +1536,27 @@ bool CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents( }, values); - components.emplace_back(*seasonalTime, m_SeasonalComponentSize, - m_DecayRate, bucketLength, boundaryCondition); - components.back().initialize(startTime, endTime, values); - components.back().interpolate(CIntegerTools::floor(endTime, period)); + m_Seasonal->add(*seasonalTime, m_SeasonalComponentSize, m_DecayRate, + bucketLength, boundaryCondition, startTime, endTime, values); + } + m_Seasonal->refreshForNewComponents(); + + TDoubleVec predictions; + m_GainController.clear(); + for (core_t::TTime time = window.startTime(); time < window.endTime(); + time += m_BucketLength) { + predictions.clear(); + if (m_Seasonal) { + m_Seasonal->appendPredictions(time, predictions); + } + if (m_Calendar) { + m_Calendar->appendPredictions(time, predictions); + } + m_GainController.seed(predictions); + m_GainController.age(ageFactor(m_DecayRate, m_BucketLength)); } - CTrendComponent candidate{trend.defaultDecayRate()}; + CTrendComponent candidate{m_Trend.defaultDecayRate()}; values = window.valuesMinusPrediction(predictor); this->fit(startTime, dt, values, candidate); this->reweightOutliers(startTime, dt, @@ -1513,31 +1565,18 @@ bool CTimeSeriesDecompositionDetail::CComponents::addSeasonalComponents( }, values); - CTrendComponent trend_{trend.defaultDecayRate()}; - this->fit(startTime, dt, values, trend_); - trend.swap(trend_); - - errors.resize(components.size()); - COrderings::simultaneousSort( - components, errors, - [](const CSeasonalComponent& lhs, const CSeasonalComponent& rhs) { - return lhs.time() < rhs.time(); - }); + CTrendComponent trend{m_Trend.defaultDecayRate()}; + this->fit(startTime, dt, values, trend); + m_Trend.swap(trend); } return newSeasonalTimes.size() > 0; } -bool CTimeSeriesDecompositionDetail::CComponents::addCalendarComponent( - const CCalendarFeature& feature, - core_t::TTime time, - maths_t::TCalendarComponentVec& components, - TComponentErrorsVec& errors) const { +bool CTimeSeriesDecompositionDetail::CComponents::addCalendarComponent(const CCalendarFeature& feature, + core_t::TTime time) { double bucketLength{static_cast(m_BucketLength)}; - components.emplace_back(feature, m_CalendarComponentSize, m_DecayRate, - bucketLength, CSplineTypes::E_Natural); - components.back().initialize(); - errors.resize(components.size()); + m_Calendar->add(feature, m_CalendarComponentSize, m_DecayRate, bucketLength); LOG_DEBUG(<< "Detected feature '" << feature.print() << "' at " << time); return true; } @@ -1604,14 +1643,10 @@ void CTimeSeriesDecompositionDetail::CComponents::fit(core_t::TTime startTime, void CTimeSeriesDecompositionDetail::CComponents::clearComponentErrors() { if (m_Seasonal) { - for (auto& errors : m_Seasonal->s_PredictionErrors) { - errors.clear(); - } + m_Seasonal->clearPredictionErrors(); } if (m_Calendar) { - for (auto& errors : m_Calendar->s_PredictionErrors) { - errors.clear(); - } + m_Calendar->clearPredictionErrors(); } } @@ -1662,6 +1697,7 @@ void CTimeSeriesDecompositionDetail::CComponents::shiftOrigin(core_t::TTime time if (m_Seasonal) { m_Seasonal->shiftOrigin(time); } + m_GainController.shiftOrigin(time); } void CTimeSeriesDecompositionDetail::CComponents::canonicalize(core_t::TTime time) { @@ -1677,7 +1713,7 @@ void CTimeSeriesDecompositionDetail::CComponents::canonicalize(core_t::TTime tim } if (m_Seasonal) { - TSeasonalComponentVec& seasonal{m_Seasonal->s_Components}; + TSeasonalComponentVec& seasonal{m_Seasonal->components()}; double slope{0.0}; TTimeTimePrDoubleFMap windowSlopes; @@ -1730,169 +1766,261 @@ bool CTimeSeriesDecompositionDetail::CComponents::CScopeNotifyOnStateChange::cha return m_Watcher; } -bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::fromDelimited(const std::string& str) { - TFloatMeanAccumulator* state[] = {&m_MeanErrorWithComponent, &m_MeanErrorWithoutComponent}; - std::string suffix = str; - for (std::size_t i = 0u, n = 0; i < 2; ++i, suffix = suffix.substr(n + 1)) { - n = suffix.find(CBasicStatistics::EXTERNAL_DELIMITER); - if (!state[i]->fromDelimited(suffix.substr(0, n))) { - LOG_ERROR(<< "Failed to parse '" << str << "'"); - return false; +bool CTimeSeriesDecompositionDetail::CComponents::CGainController::acceptRestoreTraverser( + core::CStateRestoreTraverser& traverser) { + do { + const std::string& name{traverser.name()}; + RESTORE_BUILT_IN(REGRESSION_ORIGIN_6_4_TAG, m_RegressionOrigin) + RESTORE(MEAN_SUM_AMPLITUDES_6_4_TAG, + m_MeanSumAmplitudes.fromDelimited(traverser.value())) + RESTORE(MEAN_SUM_AMPLITUDES_TREND_6_4_TAG, + traverser.traverseSubLevel(boost::bind(&TRegression::acceptRestoreTraverser, + &m_MeanSumAmplitudesTrend, _1))) + } while (traverser.next()); + return true; +} + +void CTimeSeriesDecompositionDetail::CComponents::CGainController::acceptPersistInserter( + core::CStatePersistInserter& inserter) const { + inserter.insertValue(REGRESSION_ORIGIN_6_4_TAG, m_RegressionOrigin); + inserter.insertValue(MEAN_SUM_AMPLITUDES_6_4_TAG, m_MeanSumAmplitudes.toDelimited()); + inserter.insertLevel(MEAN_SUM_AMPLITUDES_TREND_6_4_TAG, + boost::bind(&TRegression::acceptPersistInserter, + &m_MeanSumAmplitudesTrend, _1)); +} + +void CTimeSeriesDecompositionDetail::CComponents::CGainController::clear() { + m_RegressionOrigin = 0; + m_MeanSumAmplitudes = TFloatMeanAccumulator{}; + m_MeanSumAmplitudesTrend = TRegression{}; +} + +double CTimeSeriesDecompositionDetail::CComponents::CGainController::gain() const { + if (m_MeanSumAmplitudesTrend.count() > 0.0) { + TRegression::TArray params; + m_MeanSumAmplitudesTrend.parameters(params); + if (params[1] > 0.01 * CBasicStatistics::mean(m_MeanSumAmplitudes)) { + return 1.0; } } + return 3.0; +} + +void CTimeSeriesDecompositionDetail::CComponents::CGainController::seed(const TDoubleVec& predictions) { + m_MeanSumAmplitudes.add(std::accumulate( + predictions.begin(), predictions.end(), 0.0, + [](double sum, double prediction) { return sum + std::fabs(prediction); })); +} + +void CTimeSeriesDecompositionDetail::CComponents::CGainController::add(core_t::TTime time, + const TDoubleVec& predictions) { + if (predictions.size() > 0) { + m_MeanSumAmplitudes.add(std::accumulate( + predictions.begin(), predictions.end(), 0.0, [](double sum, double prediction) { + return sum + std::fabs(prediction); + })); + m_MeanSumAmplitudesTrend.add(scaleTime(time, m_RegressionOrigin), + CBasicStatistics::mean(m_MeanSumAmplitudes), + CBasicStatistics::count(m_MeanSumAmplitudes)); + } +} + +void CTimeSeriesDecompositionDetail::CComponents::CGainController::age(double factor) { + m_MeanSumAmplitudes.age(factor); + m_MeanSumAmplitudesTrend.age(factor); +} + +void CTimeSeriesDecompositionDetail::CComponents::CGainController::shiftOrigin(core_t::TTime time) { + time = CIntegerTools::floor(time, WEEK); + if (time > m_RegressionOrigin) { + m_MeanSumAmplitudesTrend.shiftAbscissa(-scaleTime(time, m_RegressionOrigin)); + m_RegressionOrigin = time; + } +} + +uint64_t CTimeSeriesDecompositionDetail::CComponents::CGainController::checksum(uint64_t seed) const { + seed = CChecksum::calculate(seed, m_RegressionOrigin); + seed = CChecksum::calculate(seed, m_MeanSumAmplitudes); + return CChecksum::calculate(seed, m_MeanSumAmplitudesTrend); +} + +bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::fromDelimited(const std::string& str_) { + std::string str{str_}; + std::size_t n{str.find(CBasicStatistics::EXTERNAL_DELIMITER)}; + if (m_MeanErrors.fromDelimited(str.substr(0, n)) == false) { + LOG_ERROR(<< "Failed to parse '" << str << "'"); + return false; + } + str = str.substr(n + 1); + if (m_MaxVarianceIncrease.fromDelimited(str) == false) { + LOG_ERROR(<< "Failed to parse '" << str << "'"); + return false; + } return true; } std::string CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::toDelimited() const { - return m_MeanErrorWithComponent.toDelimited() + CBasicStatistics::EXTERNAL_DELIMITER + - m_MeanErrorWithoutComponent.toDelimited() + CBasicStatistics::EXTERNAL_DELIMITER; + return m_MeanErrors.toDelimited() + CBasicStatistics::EXTERNAL_DELIMITER + + m_MaxVarianceIncrease.toDelimited(); } -void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::add(double error, +void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::add(double referenceError, + double error, double prediction, + double varianceIncrease, double weight) { - double errorWithComponent{winsorise(CTools::pow2(error), m_MeanErrorWithComponent)}; - double errorWithoutComponent{winsorise(CTools::pow2(error - prediction), - m_MeanErrorWithoutComponent)}; - m_MeanErrorWithComponent.add(errorWithComponent, weight); - m_MeanErrorWithoutComponent.add(errorWithoutComponent, weight); + TVector errors; + errors(0) = CTools::pow2(referenceError); + errors(1) = CTools::pow2(error); + errors(2) = CTools::pow2(error + prediction); + m_MeanErrors.add(this->winsorise(errors), weight); + m_MaxVarianceIncrease.add(varianceIncrease); } void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::clear() { - m_MeanErrorWithComponent = TFloatMeanAccumulator(); - m_MeanErrorWithoutComponent = TFloatMeanAccumulator(); + m_MeanErrors = TVectorMeanAccumulator{}; + m_MaxVarianceIncrease = TMaxAccumulator{}; } -bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::remove( - core_t::TTime bucketLength, - CSeasonalComponent& seasonal) const { - double count{CBasicStatistics::count(m_MeanErrorWithComponent)}; - double errorWithComponent{CBasicStatistics::mean(m_MeanErrorWithComponent)}; - double errorWithoutComponent{CBasicStatistics::mean(m_MeanErrorWithoutComponent)}; - return count > static_cast(10 * seasonal.time().period() / bucketLength) && - std::max(errorWithoutComponent / errorWithComponent, - seasonal.heteroscedasticity()) < 1.5; -} - -bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::remove( - core_t::TTime bucketLength, - CCalendarComponent& calendar) const { - double count{CBasicStatistics::count(m_MeanErrorWithComponent)}; - double errorWithComponent{CBasicStatistics::mean(m_MeanErrorWithComponent)}; - double errorWithoutComponent{CBasicStatistics::mean(m_MeanErrorWithoutComponent)}; - return count > static_cast(5 * calendar.feature().window() / bucketLength) && - std::max(errorWithoutComponent / errorWithComponent, - calendar.heteroscedasticity()) < 1.5; +bool CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::remove(core_t::TTime bucketLength, + core_t::TTime period) const { + double history{CBasicStatistics::count(m_MeanErrors) * static_cast(bucketLength)}; + double referenceError{CBasicStatistics::mean(m_MeanErrors)(0)}; + double errorWithComponent{CBasicStatistics::mean(m_MeanErrors)(1)}; + double errorWithoutComponent{CBasicStatistics::mean(m_MeanErrors)(2)}; + return (history > static_cast(WEEK) && errorWithComponent > referenceError) || + (history > 5.0 * static_cast(period) && m_MaxVarianceIncrease[0] < 1.2 && + errorWithoutComponent < 1.2 * errorWithComponent); } void CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::age(double factor) { - m_MeanErrorWithComponent.age(factor); - m_MeanErrorWithoutComponent.age(factor); + m_MeanErrors.age(factor); + m_MaxVarianceIncrease.age(factor); } uint64_t CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::checksum(uint64_t seed) const { - seed = CChecksum::calculate(seed, m_MeanErrorWithComponent); - return CChecksum::calculate(seed, m_MeanErrorWithoutComponent); + seed = CChecksum::calculate(seed, m_MeanErrors); + return CChecksum::calculate(seed, m_MaxVarianceIncrease); } -double CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::winsorise( - double squareError, - const TFloatMeanAccumulator& variance) { - return CBasicStatistics::count(variance) > 10.0 - ? std::min(squareError, 36.0 * CBasicStatistics::mean(variance)) +CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::TVector +CTimeSeriesDecompositionDetail::CComponents::CComponentErrors::winsorise(const TVector& squareError) const { + return CBasicStatistics::count(m_MeanErrors) > 10.0 + ? min(squareError, 36.0 * CBasicStatistics::mean(m_MeanErrors)) : squareError; } -bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::acceptRestoreTraverser( +bool CTimeSeriesDecompositionDetail::CComponents::CSeasonal::acceptRestoreTraverser( double decayRate, core_t::TTime bucketLength_, core::CStateRestoreTraverser& traverser) { double bucketLength{static_cast(bucketLength_)}; - if (traverser.name() == VERSION_6_3_TAG) { + if (traverser.name() == VERSION_6_4_TAG) { + while (traverser.next()) { + const std::string& name{traverser.name()}; + RESTORE_NO_ERROR(COMPONENT_6_4_TAG, + m_Components.emplace_back(decayRate, bucketLength, traverser)) + RESTORE(ERRORS_6_4_TAG, + core::CPersistUtils::restore(ERRORS_6_4_TAG, m_PredictionErrors, traverser)) + } + } else if (traverser.name() == VERSION_6_3_TAG) { while (traverser.next()) { const std::string& name{traverser.name()}; RESTORE_NO_ERROR(COMPONENT_6_3_TAG, - s_Components.emplace_back(decayRate, bucketLength, traverser)) - RESTORE(ERRORS_6_3_TAG, - core::CPersistUtils::restore(ERRORS_6_3_TAG, s_PredictionErrors, traverser)) + m_Components.emplace_back(decayRate, bucketLength, traverser)) } } else { // There is no version string this is historic state. do { const std::string& name{traverser.name()}; RESTORE_NO_ERROR(COMPONENT_OLD_TAG, - s_Components.emplace_back(decayRate, bucketLength, traverser)) - RESTORE(ERRORS_OLD_TAG, - core::CPersistUtils::restore(ERRORS_OLD_TAG, s_PredictionErrors, traverser)) + m_Components.emplace_back(decayRate, bucketLength, traverser)) } while (traverser.next()); } return true; } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::acceptPersistInserter( +void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::acceptPersistInserter( core::CStatePersistInserter& inserter) const { - inserter.insertValue(VERSION_6_3_TAG, ""); - for (const auto& component : s_Components) { - inserter.insertLevel(COMPONENT_6_3_TAG, boost::bind(&CSeasonalComponent::acceptPersistInserter, + inserter.insertValue(VERSION_6_4_TAG, ""); + for (const auto& component : m_Components) { + inserter.insertLevel(COMPONENT_6_4_TAG, boost::bind(&CSeasonalComponent::acceptPersistInserter, &component, _1)); } - core::CPersistUtils::persist(ERRORS_6_3_TAG, s_PredictionErrors, inserter); + core::CPersistUtils::persist(ERRORS_6_4_TAG, m_PredictionErrors, inserter); } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::decayRate(double decayRate) { - for (auto& component : s_Components) { +void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::decayRate(double decayRate) { + for (auto& component : m_Components) { component.decayRate(decayRate); } } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::propagateForwards(core_t::TTime start, +void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::propagateForwards(core_t::TTime start, core_t::TTime end) { - for (std::size_t i = 0u; i < s_Components.size(); ++i) { - core_t::TTime period{s_Components[i].time().period()}; + for (std::size_t i = 0u; i < m_Components.size(); ++i) { + core_t::TTime period{m_Components[i].time().period()}; core_t::TTime a{CIntegerTools::floor(start, period)}; core_t::TTime b{CIntegerTools::floor(end, period)}; if (b > a) { double time{static_cast(b - a) / static_cast(CTools::truncate(period, DAY, WEEK))}; - s_Components[i].propagateForwardsByTime(time); - s_PredictionErrors[i].age(std::exp(-s_Components[i].decayRate() * time)); + m_Components[i].propagateForwardsByTime(time); + m_PredictionErrors[i].age(std::exp(-m_Components[i].decayRate() * time)); } } } -std::size_t CTimeSeriesDecompositionDetail::CComponents::SSeasonal::size() const { +void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::clearPredictionErrors() { + for (auto& errors : m_PredictionErrors) { + errors.clear(); + } +} + +std::size_t CTimeSeriesDecompositionDetail::CComponents::CSeasonal::size() const { std::size_t result{0}; - for (const auto& component : s_Components) { + for (const auto& component : m_Components) { result += component.size(); } return result; } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::componentsErrorsAndDeltas( +const maths_t::TSeasonalComponentVec& +CTimeSeriesDecompositionDetail::CComponents::CSeasonal::components() const { + return m_Components; +} + +maths_t::TSeasonalComponentVec& +CTimeSeriesDecompositionDetail::CComponents::CSeasonal::components() { + return m_Components; +} + +void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::componentsErrorsAndDeltas( core_t::TTime time, TSeasonalComponentPtrVec& components, TComponentErrorsPtrVec& errors, TDoubleVec& deltas) { - std::size_t n{s_Components.size()}; + std::size_t n{m_Components.size()}; components.reserve(n); errors.reserve(n); for (std::size_t i = 0u; i < n; ++i) { - if (s_Components[i].time().inWindow(time)) { - components.push_back(&s_Components[i]); - errors.push_back(&s_PredictionErrors[i]); + if (m_Components[i].time().inWindow(time)) { + components.push_back(&m_Components[i]); + errors.push_back(&m_PredictionErrors[i]); } } deltas.resize(components.size(), 0.0); for (std::size_t i = 1u; i < components.size(); ++i) { - int j{static_cast(i - 1)}; - for (core_t::TTime period{components[i]->time().period()}; j > -1; --j) { + core_t::TTime period{components[i]->time().period()}; + for (int j{static_cast(i - 1)}; j > -1; --j) { core_t::TTime period_{components[j]->time().period()}; if (period % period_ == 0) { - double value{CBasicStatistics::mean(components[j]->value(time, 0.0))}; - double delta{0.2 * components[i]->delta(time, period_, value)}; + double value{CBasicStatistics::mean(components[j]->value(time, 0.0)) - + components[j]->meanValue()}; + double delta{0.1 * components[i]->delta(time, period_, value)}; deltas[j] += delta; deltas[i] -= delta; break; @@ -1901,10 +2029,22 @@ void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::componentsErrorsAnd } } -bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::shouldInterpolate( +void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::appendPredictions( + core_t::TTime time, + TDoubleVec& predictions) const { + predictions.reserve(predictions.size() + m_Components.size()); + for (const auto& component : m_Components) { + if (component.time().inWindow(time)) { + predictions.push_back(CBasicStatistics::mean(component.value(time, 0.0)) - + component.meanValue()); + } + } +} + +bool CTimeSeriesDecompositionDetail::CComponents::CSeasonal::shouldInterpolate( core_t::TTime time, core_t::TTime last) const { - for (const auto& component : s_Components) { + for (const auto& component : m_Components) { core_t::TTime period{component.time().period()}; core_t::TTime a{CIntegerTools::floor(last, period)}; core_t::TTime b{CIntegerTools::floor(time, period)}; @@ -1915,12 +2055,12 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::shouldInterpolate( return false; } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::interpolate(core_t::TTime time, - core_t::TTime last, +void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::interpolate(core_t::TTime time, + core_t::TTime lastTime, bool refine) { - for (auto& component : s_Components) { + for (auto& component : m_Components) { core_t::TTime period{component.time().period()}; - core_t::TTime a{CIntegerTools::floor(last, period)}; + core_t::TTime a{CIntegerTools::floor(lastTime, period)}; core_t::TTime b{CIntegerTools::floor(time, period)}; if (b > a || !component.initialized()) { component.interpolate(b, refine); @@ -1928,8 +2068,8 @@ void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::interpolate(core_t: } } -bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::initialized() const { - for (const auto& component : s_Components) { +bool CTimeSeriesDecompositionDetail::CComponents::CSeasonal::initialized() const { + for (const auto& component : m_Components) { if (component.initialized()) { return true; } @@ -1937,15 +2077,47 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::initialized() const return false; } -bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime time, +void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::add( + const CSeasonalTime& seasonalTime, + std::size_t size, + double decayRate, + double bucketLength, + CSplineTypes::EBoundaryCondition boundaryCondition, + core_t::TTime startTime, + core_t::TTime endTime, + const TFloatMeanAccumulatorVec& values) { + m_Components.emplace_back(seasonalTime, size, decayRate, bucketLength, boundaryCondition); + m_Components.back().initialize(startTime, endTime, values); + m_Components.back().interpolate(CIntegerTools::floor(endTime, seasonalTime.period())); +} + +void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::refreshForNewComponents() { + m_PredictionErrors.resize(m_Components.size()); + COrderings::simultaneousSort( + m_Components, m_PredictionErrors, + [](const CSeasonalComponent& lhs, const CSeasonalComponent& rhs) { + return lhs.time() < rhs.time(); + }); +} + +void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::removeExcludedComponents( + const CSeasonalTime& time) { + m_Components.erase(std::remove_if(m_Components.begin(), m_Components.end(), + [&time](const CSeasonalComponent& component) { + return time.excludes(component.time()); + }), + m_Components.end()); +} + +bool CTimeSeriesDecompositionDetail::CComponents::CSeasonal::prune(core_t::TTime time, core_t::TTime bucketLength) { - std::size_t n = s_Components.size(); + std::size_t n{m_Components.size()}; if (n > 1) { TTimeTimePrSizeFMap windowed; windowed.reserve(n); - for (const auto& component : s_Components) { - const CSeasonalTime& time_ = component.time(); + for (const auto& component : m_Components) { + const CSeasonalTime& time_{component.time()}; if (time_.windowed()) { ++windowed[time_.window()]; } @@ -1955,25 +2127,25 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime TTimeTimePrDoubleFMap shifts; shifts.reserve(n); for (std::size_t i = 0u; i < n; ++i) { - const CSeasonalTime& time_ = s_Components[i].time(); + const CSeasonalTime& time_{m_Components[i].time()}; auto j = windowed.find(time_.window()); if (j == windowed.end() || j->second > 1) { - if (s_PredictionErrors[i].remove(bucketLength, s_Components[i])) { + if (m_PredictionErrors[i].remove(bucketLength, time_.period())) { LOG_DEBUG(<< "Removing seasonal component" << " with period '" << time_.period() << "' at " << time); remove[i] = true; - shifts[time_.window()] += s_Components[i].meanValue(); + shifts[time_.window()] += m_Components[i].meanValue(); --j->second; } } } - CSetTools::simultaneousRemoveIf(remove, s_Components, s_PredictionErrors, + CSetTools::simultaneousRemoveIf(remove, m_Components, m_PredictionErrors, [](bool remove_) { return remove_; }); for (auto& shift : shifts) { if (windowed.count(shift.first) > 0) { - for (auto& component : s_Components) { + for (auto& component : m_Components) { if (shift.first == component.time().window()) { component.shiftLevel(shift.second); break; @@ -1981,7 +2153,7 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime } } else { bool fallback = true; - for (auto& component : s_Components) { + for (auto& component : m_Components) { if (!component.time().windowed()) { component.shiftLevel(shift.second); fallback = false; @@ -1990,8 +2162,8 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime } if (fallback) { TTimeTimePrVec shifted; - shifted.reserve(s_Components.size()); - for (auto& component : s_Components) { + shifted.reserve(m_Components.size()); + for (auto& component : m_Components) { const CSeasonalTime& time_ = component.time(); auto containsWindow = [&time_](const TTimeTimePr& window) { return !(time_.windowEnd() <= window.first || @@ -2007,104 +2179,119 @@ bool CTimeSeriesDecompositionDetail::CComponents::SSeasonal::prune(core_t::TTime } } - return s_Components.empty(); + return m_Components.empty(); } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::shiftOrigin(core_t::TTime time) { - for (auto& component : s_Components) { +void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::shiftOrigin(core_t::TTime time) { + for (auto& component : m_Components) { component.shiftOrigin(time); } } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::linearScale(core_t::TTime time, +void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::linearScale(core_t::TTime time, double scale) { - for (auto& component : s_Components) { + for (auto& component : m_Components) { component.linearScale(time, scale); } } -uint64_t CTimeSeriesDecompositionDetail::CComponents::SSeasonal::checksum(uint64_t seed) const { - seed = CChecksum::calculate(seed, s_Components); - return CChecksum::calculate(seed, s_PredictionErrors); +uint64_t CTimeSeriesDecompositionDetail::CComponents::CSeasonal::checksum(uint64_t seed) const { + seed = CChecksum::calculate(seed, m_Components); + return CChecksum::calculate(seed, m_PredictionErrors); } -void CTimeSeriesDecompositionDetail::CComponents::SSeasonal::debugMemoryUsage( +void CTimeSeriesDecompositionDetail::CComponents::CSeasonal::debugMemoryUsage( core::CMemoryUsage::TMemoryUsagePtr mem) const { - mem->setName("SSeasonal"); - core::CMemoryDebug::dynamicSize("s_Components", s_Components, mem); - core::CMemoryDebug::dynamicSize("s_PredictionErrors", s_PredictionErrors, mem); + mem->setName("CSeasonal"); + core::CMemoryDebug::dynamicSize("m_Components", m_Components, mem); + core::CMemoryDebug::dynamicSize("m_PredictionErrors", m_PredictionErrors, mem); } -std::size_t CTimeSeriesDecompositionDetail::CComponents::SSeasonal::memoryUsage() const { - return core::CMemory::dynamicSize(s_Components) + - core::CMemory::dynamicSize(s_PredictionErrors); +std::size_t CTimeSeriesDecompositionDetail::CComponents::CSeasonal::memoryUsage() const { + return core::CMemory::dynamicSize(m_Components) + + core::CMemory::dynamicSize(m_PredictionErrors); } -bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::acceptRestoreTraverser( +bool CTimeSeriesDecompositionDetail::CComponents::CCalendar::acceptRestoreTraverser( double decayRate, core_t::TTime bucketLength_, core::CStateRestoreTraverser& traverser) { double bucketLength{static_cast(bucketLength_)}; - if (traverser.name() == VERSION_6_3_TAG) { + if (traverser.name() == VERSION_6_4_TAG) { + while (traverser.next()) { + const std::string& name{traverser.name()}; + RESTORE_NO_ERROR(COMPONENT_6_4_TAG, + m_Components.emplace_back(decayRate, bucketLength, traverser)) + RESTORE(ERRORS_6_4_TAG, + core::CPersistUtils::restore(ERRORS_6_4_TAG, m_PredictionErrors, traverser)) + } + } else if (traverser.name() == VERSION_6_3_TAG) { while (traverser.next()) { const std::string& name{traverser.name()}; RESTORE_NO_ERROR(COMPONENT_6_3_TAG, - s_Components.emplace_back(decayRate, bucketLength, traverser)) - RESTORE(ERRORS_6_3_TAG, - core::CPersistUtils::restore(ERRORS_6_3_TAG, s_PredictionErrors, traverser)) + m_Components.emplace_back(decayRate, bucketLength, traverser)) } } else { // There is no version string this is historic state. do { const std::string& name{traverser.name()}; RESTORE_NO_ERROR(COMPONENT_OLD_TAG, - s_Components.emplace_back(decayRate, bucketLength, traverser)) - RESTORE(ERRORS_OLD_TAG, - core::CPersistUtils::restore(ERRORS_OLD_TAG, s_PredictionErrors, traverser)) + m_Components.emplace_back(decayRate, bucketLength, traverser)) } while (traverser.next()); } return true; } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::acceptPersistInserter( +void CTimeSeriesDecompositionDetail::CComponents::CCalendar::acceptPersistInserter( core::CStatePersistInserter& inserter) const { - inserter.insertValue(VERSION_6_3_TAG, ""); - for (const auto& component : s_Components) { - inserter.insertLevel(COMPONENT_6_3_TAG, boost::bind(&CCalendarComponent::acceptPersistInserter, + inserter.insertValue(VERSION_6_4_TAG, ""); + for (const auto& component : m_Components) { + inserter.insertLevel(COMPONENT_6_4_TAG, boost::bind(&CCalendarComponent::acceptPersistInserter, &component, _1)); } - core::CPersistUtils::persist(ERRORS_6_3_TAG, s_PredictionErrors, inserter); + core::CPersistUtils::persist(ERRORS_6_4_TAG, m_PredictionErrors, inserter); } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::decayRate(double decayRate) { - for (auto& component : s_Components) { +void CTimeSeriesDecompositionDetail::CComponents::CCalendar::decayRate(double decayRate) { + for (auto& component : m_Components) { component.decayRate(decayRate); } } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::propagateForwards(core_t::TTime start, +void CTimeSeriesDecompositionDetail::CComponents::CCalendar::propagateForwards(core_t::TTime start, core_t::TTime end) { - for (std::size_t i = 0u; i < s_Components.size(); ++i) { + for (std::size_t i = 0u; i < m_Components.size(); ++i) { core_t::TTime a{CIntegerTools::floor(start, MONTH)}; core_t::TTime b{CIntegerTools::floor(end, MONTH)}; if (b > a) { double time{static_cast(b - a) / static_cast(MONTH)}; - s_Components[i].propagateForwardsByTime(time); - s_PredictionErrors[i].age(std::exp(-s_Components[i].decayRate() * time)); + m_Components[i].propagateForwardsByTime(time); + m_PredictionErrors[i].age(std::exp(-m_Components[i].decayRate() * time)); } } } -std::size_t CTimeSeriesDecompositionDetail::CComponents::SCalendar::size() const { +void CTimeSeriesDecompositionDetail::CComponents::CCalendar::clearPredictionErrors() { + for (auto& errors : m_PredictionErrors) { + errors.clear(); + } +} + +std::size_t CTimeSeriesDecompositionDetail::CComponents::CCalendar::size() const { std::size_t result{0}; - for (const auto& component : s_Components) { + for (const auto& component : m_Components) { result += component.size(); } return result; } -bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::haveComponent(CCalendarFeature feature) const { - for (const auto& component : s_Components) { +const maths_t::TCalendarComponentVec& +CTimeSeriesDecompositionDetail::CComponents::CCalendar::components() const { + return m_Components; +} + +bool CTimeSeriesDecompositionDetail::CComponents::CCalendar::haveComponent(CCalendarFeature feature) const { + for (const auto& component : m_Components) { if (component.feature() == feature) { return true; } @@ -2112,25 +2299,37 @@ bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::haveComponent(CCale return false; } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::componentsAndErrors( +void CTimeSeriesDecompositionDetail::CComponents::CCalendar::componentsAndErrors( core_t::TTime time, TCalendarComponentPtrVec& components, TComponentErrorsPtrVec& errors) { - std::size_t n = s_Components.size(); + std::size_t n = m_Components.size(); components.reserve(n); errors.reserve(n); for (std::size_t i = 0u; i < n; ++i) { - if (s_Components[i].feature().inWindow(time)) { - components.push_back(&s_Components[i]); - errors.push_back(&s_PredictionErrors[i]); + if (m_Components[i].feature().inWindow(time)) { + components.push_back(&m_Components[i]); + errors.push_back(&m_PredictionErrors[i]); } } } -bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::shouldInterpolate( +void CTimeSeriesDecompositionDetail::CComponents::CCalendar::appendPredictions( + core_t::TTime time, + TDoubleVec& predictions) const { + predictions.reserve(predictions.size() + m_Components.size()); + for (const auto& component : m_Components) { + if (component.feature().inWindow(time)) { + predictions.push_back(CBasicStatistics::mean(component.value(time, 0.0)) - + component.meanValue()); + } + } +} + +bool CTimeSeriesDecompositionDetail::CComponents::CCalendar::shouldInterpolate( core_t::TTime time, core_t::TTime last) const { - for (const auto& component : s_Components) { + for (const auto& component : m_Components) { CCalendarFeature feature = component.feature(); if (!feature.inWindow(time) && feature.inWindow(last)) { return true; @@ -2139,19 +2338,19 @@ bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::shouldInterpolate( return false; } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::interpolate(core_t::TTime time, - core_t::TTime last, +void CTimeSeriesDecompositionDetail::CComponents::CCalendar::interpolate(core_t::TTime time, + core_t::TTime lastTime, bool refine) { - for (auto& component : s_Components) { + for (auto& component : m_Components) { CCalendarFeature feature = component.feature(); - if (!feature.inWindow(time) && feature.inWindow(last)) { + if (!feature.inWindow(time) && feature.inWindow(lastTime)) { component.interpolate(time - feature.offset(time), refine); } } } -bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::initialized() const { - for (const auto& component : s_Components) { +bool CTimeSeriesDecompositionDetail::CComponents::CCalendar::initialized() const { + for (const auto& component : m_Components) { if (component.initialized()) { return true; } @@ -2159,45 +2358,54 @@ bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::initialized() const return false; } -bool CTimeSeriesDecompositionDetail::CComponents::SCalendar::prune(core_t::TTime time, +bool CTimeSeriesDecompositionDetail::CComponents::CCalendar::prune(core_t::TTime time, core_t::TTime bucketLength) { - TBoolVec remove(s_Components.size(), false); - for (std::size_t i = 0u; i < s_Components.size(); ++i) { - if (s_PredictionErrors[i].remove(bucketLength, s_Components[i])) { + TBoolVec remove(m_Components.size(), false); + for (std::size_t i = 0u; i < m_Components.size(); ++i) { + if (m_PredictionErrors[i].remove(bucketLength, m_Components[i].feature().window())) { LOG_DEBUG(<< "Removing calendar component" - << " '" << s_Components[i].feature().print() << "' at " << time); + << " '" << m_Components[i].feature().print() << "' at " << time); remove[i] = true; } } - CSetTools::simultaneousRemoveIf(remove, s_Components, s_PredictionErrors, + CSetTools::simultaneousRemoveIf(remove, m_Components, m_PredictionErrors, [](bool remove_) { return remove_; }); - return s_Components.empty(); + return m_Components.empty(); +} + +void CTimeSeriesDecompositionDetail::CComponents::CCalendar::add(const CCalendarFeature& feature, + std::size_t size, + double decayRate, + double bucketLength) { + m_Components.emplace_back(feature, size, decayRate, bucketLength, CSplineTypes::E_Natural); + m_Components.back().initialize(); + m_PredictionErrors.resize(m_Components.size()); } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::linearScale(core_t::TTime time, +void CTimeSeriesDecompositionDetail::CComponents::CCalendar::linearScale(core_t::TTime time, double scale) { - for (auto& component : s_Components) { + for (auto& component : m_Components) { component.linearScale(time, scale); } } -uint64_t CTimeSeriesDecompositionDetail::CComponents::SCalendar::checksum(uint64_t seed) const { - seed = CChecksum::calculate(seed, s_Components); - return CChecksum::calculate(seed, s_PredictionErrors); +uint64_t CTimeSeriesDecompositionDetail::CComponents::CCalendar::checksum(uint64_t seed) const { + seed = CChecksum::calculate(seed, m_Components); + return CChecksum::calculate(seed, m_PredictionErrors); } -void CTimeSeriesDecompositionDetail::CComponents::SCalendar::debugMemoryUsage( +void CTimeSeriesDecompositionDetail::CComponents::CCalendar::debugMemoryUsage( core::CMemoryUsage::TMemoryUsagePtr mem) const { - mem->setName("SCalendar"); - core::CMemoryDebug::dynamicSize("s_Components", s_Components, mem); - core::CMemoryDebug::dynamicSize("s_PredictionErrors", s_PredictionErrors, mem); + mem->setName("CCalendar"); + core::CMemoryDebug::dynamicSize("m_Components", m_Components, mem); + core::CMemoryDebug::dynamicSize("m_PredictionErrors", m_PredictionErrors, mem); } -std::size_t CTimeSeriesDecompositionDetail::CComponents::SCalendar::memoryUsage() const { - return core::CMemory::dynamicSize(s_Components) + - core::CMemory::dynamicSize(s_PredictionErrors); +std::size_t CTimeSeriesDecompositionDetail::CComponents::CCalendar::memoryUsage() const { + return core::CMemory::dynamicSize(m_Components) + + core::CMemory::dynamicSize(m_PredictionErrors); } } } diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index 0e2e561452..024758a2a7 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -1370,11 +1370,11 @@ CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams& par if (m_ChangeDetector != nullptr) { m_ChangeDetector->addSamples({{time, values[median].second[0]}}, {weights}); - if (m_ChangeDetector->stopTesting()) { m_ChangeDetector.reset(); } else if (auto change = m_ChangeDetector->change()) { - LOG_DEBUG("Detected " << change->print() << " at " << values[median].first); + LOG_DEBUG(<< "Detected " << change->print() << " at " + << values[median].first); m_ChangeDetector.reset(); return this->applyChange(*change); } @@ -1385,22 +1385,26 @@ CUnivariateTimeSeriesModel::testAndApplyChange(const CModelAddSamplesParams& par CUnivariateTimeSeriesModel::EUpdateResult CUnivariateTimeSeriesModel::applyChange(const SChangeDescription& change) { + core_t::TTime timeOfChangePoint{m_CandidateChangePoint.first}; + double valueAtChangePoint{m_CandidateChangePoint.second}; + for (auto& value : m_SlidingWindow) { - switch (change.s_Description) { - case SChangeDescription::E_LevelShift: - value.second += change.s_Value[0]; - break; - case SChangeDescription::E_LinearScale: - value.second *= change.s_Value[0]; - break; - case SChangeDescription::E_TimeShift: - value.first += static_cast(change.s_Value[0]); - break; + if (value.first < timeOfChangePoint) { + switch (change.s_Description) { + case SChangeDescription::E_LevelShift: + value.second += change.s_Value[0]; + break; + case SChangeDescription::E_LinearScale: + value.second *= change.s_Value[0]; + break; + case SChangeDescription::E_TimeShift: + value.first += static_cast(change.s_Value[0]); + break; + } } } - if (m_TrendModel->applyChange(m_CandidateChangePoint.first, - m_CandidateChangePoint.second, change)) { + if (m_TrendModel->applyChange(timeOfChangePoint, valueAtChangePoint, change)) { this->reinitializeStateGivenNewComponent(); } else { change.s_ResidualModel->decayRate(m_ResidualModel->decayRate()); diff --git a/lib/maths/unittest/CForecastTest.cc b/lib/maths/unittest/CForecastTest.cc index dd93281379..45528ae4eb 100644 --- a/lib/maths/unittest/CForecastTest.cc +++ b/lib/maths/unittest/CForecastTest.cc @@ -36,6 +36,7 @@ using namespace handy_typedefs; namespace { using TDoubleVec = std::vector; +using TTimeVec = std::vector; using TTimeDoublePr = std::pair; using TTimeDoublePrVec = std::vector; using TDouble2Vec = core::CSmallVector; @@ -46,6 +47,63 @@ using TErrorBarVec = std::vector; using TMeanAccumulator = maths::CBasicStatistics::SSampleMean::TAccumulator; using TModelPtr = std::shared_ptr; +class CDebugGenerator { +public: + static const bool ENABLED{false}; + +public: + ~CDebugGenerator() { + if (ENABLED) { + std::ofstream file; + file.open("results.m"); + file << "t = " << core::CContainerPrinter::print(m_ValueTimes) << ";\n"; + file << "f = " << core::CContainerPrinter::print(m_Values) << ";\n"; + file << "tp = " << core::CContainerPrinter::print(m_PredictionTimes) << ";\n"; + file << "fp = " << core::CContainerPrinter::print(m_Predictions) << ";\n"; + file << "tf = " << core::CContainerPrinter::print(m_ForecastTimes) << ";\n"; + file << "fl = " << core::CContainerPrinter::print(m_ForecastLower) << ";\n"; + file << "fm = " << core::CContainerPrinter::print(m_ForecastMean) << ";\n"; + file << "fu = " << core::CContainerPrinter::print(m_ForecastUpper) << ";\n"; + file << "hold on;\n"; + file << "plot(t, f);\n"; + file << "plot(tp, fp, 'k');\n"; + file << "plot(tf, fl, 'r');\n"; + file << "plot(tf, fm, 'k');\n"; + file << "plot(tf, fu, 'r');\n"; + } + } + void addValue(core_t::TTime time, double value) { + if (ENABLED) { + m_ValueTimes.push_back(time); + m_Values.push_back(value); + } + } + void addPrediction(core_t::TTime time, double prediction) { + if (ENABLED) { + m_PredictionTimes.push_back(time); + m_Predictions.push_back(prediction); + } + } + void addForecast(core_t::TTime time, const maths::SErrorBar& forecast) { + if (ENABLED) { + m_ForecastTimes.push_back(time); + m_ForecastLower.push_back(forecast.s_LowerBound); + m_ForecastMean.push_back(forecast.s_Predicted); + m_ForecastUpper.push_back(forecast.s_UpperBound); + } + } + +private: + TTimeVec m_ValueTimes; + TDoubleVec m_Values; + TTimeVec m_PredictionTimes; + TDoubleVec m_Predictions; + TTimeVec m_ForecastTimes; + TDoubleVec m_ForecastLower; + TDoubleVec m_ForecastMean; + TDoubleVec m_ForecastUpper; +}; + const double DECAY_RATE{0.0005}; const std::size_t TAG{0u}; const TDouble2Vec MINIMUM_VALUE{boost::numeric::bounds::lowest()}; @@ -94,7 +152,7 @@ void CForecastTest::testDailyNoLongTermTrend() { return 40.0 + alpha * y[i / 6] + beta * y[(i / 6 + 1) % y.size()] + noise; }; - this->test(trend, bucketLength, 63, 64.0, 5.0, 0.13); + this->test(trend, bucketLength, 63, 64.0, 5.0, 0.14); } void CForecastTest::testDailyConstantLongTermTrend() { @@ -134,7 +192,7 @@ void CForecastTest::testDailyVaryingLongTermTrend() { 8.0 * std::sin(boost::math::double_constants::two_pi * time_ / 43200.0) + noise; }; - this->test(trend, bucketLength, 98, 9.0, 24.0, 0.042); + this->test(trend, bucketLength, 98, 9.0, 22.0, 0.04); } void CForecastTest::testComplexNoLongTermTrend() { @@ -150,7 +208,7 @@ void CForecastTest::testComplexNoLongTermTrend() { return scale[d] * (20.0 + y[h] + noise); }; - this->test(trend, bucketLength, 63, 24.0, 9.0, 0.13); + this->test(trend, bucketLength, 63, 24.0, 9.0, 0.14); } void CForecastTest::testComplexConstantLongTermTrend() { @@ -167,11 +225,11 @@ void CForecastTest::testComplexConstantLongTermTrend() { scale[d] * (20.0 + y[h] + noise); }; - this->test(trend, bucketLength, 63, 24.0, 11.0, 0.01); + this->test(trend, bucketLength, 63, 24.0, 10.0, 0.01); } void CForecastTest::testComplexVaryingLongTermTrend() { - core_t::TTime bucketLength{3600}; + core_t::TTime bucketLength{1800}; double day{static_cast(core::constants::DAY)}; TDoubleVec times{0.0, 5.0 * day, 10.0 * day, 15.0 * day, 20.0 * day, 25.0 * day, 30.0 * day, 35.0 * day, @@ -180,7 +238,7 @@ void CForecastTest::testComplexVaryingLongTermTrend() { 80.0 * day, 85.0 * day, 90.0 * day, 95.0 * day, 100.0 * day, 105.0 * day, 110.0 * day, 115.0 * day}; TDoubleVec values{20.0, 30.0, 25.0, 35.0, 45.0, 40.0, 38.0, 36.0, - 35.0, 25.0, 35.0, 45.0, 55.0, 62.0, 70.0, 76.0, + 35.0, 34.0, 35.0, 40.0, 48.0, 55.0, 65.0, 76.0, 79.0, 82.0, 86.0, 90.0, 95.0, 100.0, 106.0, 112.0}; TDoubleVec y{0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 10.0, 11.0, 12.0, 11.0, 10.0, 9.0, 8.0, @@ -192,12 +250,12 @@ void CForecastTest::testComplexVaryingLongTermTrend() { TTrend trend = [&trend_, &y, &scale, bucketLength](core_t::TTime time, double noise) { core_t::TTime d{(time % core::constants::WEEK) / core::constants::DAY}; - core_t::TTime h{(time % core::constants::DAY) / bucketLength}; + core_t::TTime h{(time % core::constants::DAY) / 2 / bucketLength}; double time_{static_cast(time)}; return trend_.value(time_) + scale[d] * (20.0 + y[h] + noise); }; - this->test(trend, bucketLength, 63, 4.0, 44.0, 0.06); + this->test(trend, bucketLength, 63, 4.0, 24.0, 0.05); } void CForecastTest::testNonNegative() { @@ -212,16 +270,10 @@ void CForecastTest::testNonNegative() { decayRateControllers()}; maths::CUnivariateTimeSeriesModel model(params(bucketLength), TAG, trend, prior, &controllers); + CDebugGenerator debug; LOG_DEBUG(<< "*** learn ***"); - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec actual; - //TDoubleVec ly; - //TDoubleVec my; - //TDoubleVec uy; - core_t::TTime time{0}; std::vector weights{ maths_t::CUnitWeights::unit(1)}; @@ -237,7 +289,8 @@ void CForecastTest::testNonNegative() { .priorWeights(weights); double y{std::max(*value, 0.0)}; model.addSamples(params, {core::make_triple(time, TDouble2Vec{y}, TAG)}); - //actual.push_back(y); + debug.addValue(time, y); + debug.addPrediction(time, maths::CBasicStatistics::mean(model.predict(time))); } } @@ -267,10 +320,8 @@ void CForecastTest::testNonNegative() { outOfBounds += (y < prediction[i].s_LowerBound || y > prediction[i].s_UpperBound ? 1 : 0); ++count; - //actual.push_back(y); - //ly.push_back(prediction[i].s_LowerBound); - //my.push_back(prediction[i].s_Predicted); - //uy.push_back(prediction[i].s_UpperBound); + debug.addValue(time, y); + debug.addForecast(time, prediction[i]); } } @@ -278,11 +329,6 @@ void CForecastTest::testNonNegative() { static_cast(count)}; LOG_DEBUG(<< "% out of bounds = " << percentageOutOfBounds); - //file << "actual = " << core::CContainerPrinter::print(actual) << ";\n"; - //file << "ly = " << core::CContainerPrinter::print(ly) << ";\n"; - //file << "my = " << core::CContainerPrinter::print(my) << ";\n"; - //file << "uy = " << core::CContainerPrinter::print(uy) << ";\n"; - CPPUNIT_ASSERT(percentageOutOfBounds < 8.0); } @@ -308,16 +354,10 @@ void CForecastTest::testFinancialIndex() { decayRateControllers()}; maths::CUnivariateTimeSeriesModel model(params(bucketLength), TAG, trend, prior, &controllers); + CDebugGenerator debug; LOG_DEBUG(<< "*** learn ***"); - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec actual; - //TDoubleVec ly; - //TDoubleVec my; - //TDoubleVec uy; - std::size_t n{5 * timeseries.size() / 6}; TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; @@ -327,7 +367,10 @@ void CForecastTest::testFinancialIndex() { model.addSamples( params, {core::make_triple(timeseries[i].first, TDouble2Vec{timeseries[i].second}, TAG)}); - //actual.push_back(timeseries[i].second); + debug.addValue(timeseries[i].first, timeseries[i].second); + debug.addPrediction( + timeseries[i].first, + maths::CBasicStatistics::mean(model.predict(timeseries[i].first))); } LOG_DEBUG(<< "*** forecast ***"); @@ -351,10 +394,8 @@ void CForecastTest::testFinancialIndex() { (yi < prediction[j].s_LowerBound || yi > prediction[j].s_UpperBound ? 1 : 0); ++count; error.add(std::fabs(yi - prediction[j].s_Predicted) / std::fabs(yi)); - //actual.push_back(yi); - //ly.push_back(prediction[j].s_LowerBound); - //my.push_back(prediction[j].s_Predicted); - //uy.push_back(prediction[j].s_UpperBound); + debug.addValue(timeseries[i].first, timeseries[i].second); + debug.addForecast(timeseries[i].first, prediction[j]); } double percentageOutOfBounds{100.0 * static_cast(outOfBounds) / @@ -362,11 +403,6 @@ void CForecastTest::testFinancialIndex() { LOG_DEBUG(<< "% out of bounds = " << percentageOutOfBounds); LOG_DEBUG(<< "error = " << maths::CBasicStatistics::mean(error)); - //file << "actual = " << core::CContainerPrinter::print(actual) << ";\n"; - //file << "ly = " << core::CContainerPrinter::print(ly) << ";\n"; - //file << "my = " << core::CContainerPrinter::print(my) << ";\n"; - //file << "uy = " << core::CContainerPrinter::print(uy) << ";\n"; - CPPUNIT_ASSERT(percentageOutOfBounds < 52.0); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.1); } @@ -404,13 +440,6 @@ void CForecastTest::test(TTrend trend, double noiseVariance, double maximumPercentageOutOfBounds, double maximumError) { - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec actual; - //TDoubleVec ly; - //TDoubleVec my; - //TDoubleVec uy; - LOG_DEBUG(<< "*** learn ***"); test::CRandomNumbers rng; @@ -421,6 +450,7 @@ void CForecastTest::test(TTrend trend, params(bucketLength), TAG, maths::CTimeSeriesDecomposition(0.012, bucketLength), maths::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData, DECAY_RATE), &controllers); + CDebugGenerator debug; core_t::TTime time{0}; TDouble2VecWeightsAryVec weights{maths_t::CUnitWeights::unit(1)}; @@ -434,7 +464,8 @@ void CForecastTest::test(TTrend trend, params.integer(false).propagationInterval(1.0).trendWeights(weights).priorWeights(weights); double yi{trend(time, noise[i])}; model.addSamples(params, {core::make_triple(time, TDouble2Vec{yi}, TAG)}); - //actual.push_back(yi); + debug.addValue(time, yi); + debug.addPrediction(time, maths::CBasicStatistics::mean(model.predict(time))); } } @@ -465,10 +496,8 @@ void CForecastTest::test(TTrend trend, (yj < prediction[i].s_LowerBound || yj > prediction[i].s_UpperBound ? 1 : 0); ++count; error.add(std::fabs(yj - prediction[i].s_Predicted) / std::fabs(yj)); - //actual.push_back(yj); - //ly.push_back(prediction[i].s_LowerBound); - //my.push_back(prediction[i].s_Predicted); - //uy.push_back(prediction[i].s_UpperBound); + debug.addValue(time, yj); + debug.addForecast(time, prediction[i]); } } @@ -477,11 +506,6 @@ void CForecastTest::test(TTrend trend, LOG_DEBUG(<< "% out of bounds = " << percentageOutOfBounds); LOG_DEBUG(<< "error = " << maths::CBasicStatistics::mean(error)); - //file << "actual = " << core::CContainerPrinter::print(actual) << ";\n"; - //file << "ly = " << core::CContainerPrinter::print(ly) << ";\n"; - //file << "my = " << core::CContainerPrinter::print(my) << ";\n"; - //file << "uy = " << core::CContainerPrinter::print(uy) << ";\n"; - CPPUNIT_ASSERT(percentageOutOfBounds < maximumPercentageOutOfBounds); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < maximumError); } diff --git a/lib/maths/unittest/CSeasonalComponentTest.cc b/lib/maths/unittest/CSeasonalComponentTest.cc index 789192d4d2..4038663adb 100644 --- a/lib/maths/unittest/CSeasonalComponentTest.cc +++ b/lib/maths/unittest/CSeasonalComponentTest.cc @@ -396,7 +396,7 @@ void CSeasonalComponentTest::testConstantPeriodic() { LOG_DEBUG(<< "error1 = " << error1); LOG_DEBUG(<< "error2 = " << error2); CPPUNIT_ASSERT(error1 < 11.0); - CPPUNIT_ASSERT(error2 < 4.6); + CPPUNIT_ASSERT(error2 < 4.7); totalError1 += error1; totalError2 += error2; @@ -414,7 +414,7 @@ void CSeasonalComponentTest::testConstantPeriodic() { totalError2 /= 40.0; LOG_DEBUG(<< "totalError1 = " << totalError1); LOG_DEBUG(<< "totalError2 = " << totalError2); - CPPUNIT_ASSERT(totalError1 < 7.3); + CPPUNIT_ASSERT(totalError1 < 7.5); CPPUNIT_ASSERT(totalError2 < 4.2); } } @@ -535,7 +535,7 @@ void CSeasonalComponentTest::testTimeVaryingPeriodic() { LOG_DEBUG(<< "mean error 1 = " << totalError1 / numberErrors); LOG_DEBUG(<< "mean error 2 = " << totalError2 / numberErrors); - CPPUNIT_ASSERT(totalError1 / numberErrors < 19.0); + CPPUNIT_ASSERT(totalError1 / numberErrors < 18.0); CPPUNIT_ASSERT(totalError2 / numberErrors < 14.0); } diff --git a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc index ed88fa4413..a716e6ece4 100644 --- a/lib/maths/unittest/CTimeSeriesDecompositionTest.cc +++ b/lib/maths/unittest/CTimeSeriesDecompositionTest.cc @@ -38,6 +38,7 @@ namespace { using TDoubleDoublePr = std::pair; using TDoubleVec = std::vector; using TDoubleVecVec = std::vector; +using TDouble1Vec = core::CSmallVector; using TTimeVec = std::vector; using TTimeDoublePr = std::pair; using TTimeDoublePrVec = std::vector; @@ -48,6 +49,57 @@ double mean(const TDoubleDoublePr& x) { return (x.first + x.second) / 2.0; } +class CDebugGenerator { +public: + static const bool ENABLED{false}; + +public: + CDebugGenerator(const std::string& file = "results.m") : m_File(file) {} + + ~CDebugGenerator() { + if (ENABLED) { + std::ofstream file; + file.open(m_File); + file << "t = " << core::CContainerPrinter::print(m_ValueTimes) << ";\n"; + file << "f = " << core::CContainerPrinter::print(m_Values) << ";\n"; + file << "te = " << core::CContainerPrinter::print(m_PredictionTimes) << ";\n"; + file << "fe = " << core::CContainerPrinter::print(m_Predictions) << ";\n"; + file << "r = " << core::CContainerPrinter::print(m_Errors) << ";\n"; + file << "figure(1);\n"; + file << "clf;\n"; + file << "hold on;\n"; + file << "plot(t, f);\n"; + file << "plot(te, fe, 'r');\n"; + file << "axis([t(1) t(columns(t)) min(min(f),min(fe)) max(max(f),max(fe))]);\n"; + file << "figure(2);\n"; + file << "clf;\n"; + file << "plot(te, r, 'k');\n"; + file << "axis([t(1) t(columns(t)) min(r) max(r)]);"; + } + } + void addValue(core_t::TTime time, double value) { + if (ENABLED) { + m_ValueTimes.push_back(time); + m_Values.push_back(value); + } + } + void addPrediction(core_t::TTime time, double prediction, double error) { + if (ENABLED) { + m_PredictionTimes.push_back(time); + m_Predictions.push_back(prediction); + m_Errors.push_back(error); + } + } + +private: + std::string m_File; + TTimeVec m_ValueTimes; + TDoubleVec m_Values; + TTimeVec m_PredictionTimes; + TDoubleVec m_Predictions; + TDoubleVec m_Errors; +}; + const core_t::TTime FIVE_MINS = 300; const core_t::TTime TEN_MINS = 600; const core_t::TTime HALF_HOUR = core::constants::HOUR / 2; @@ -77,6 +129,7 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines() { core_t::TTime lastWeek = 0; maths::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); + CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; @@ -84,20 +137,12 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines() { double totalMaxValue = 0.0; double totalPercentileError = 0.0; - //std::ofstream file; - //file.open("results.m"); - //file << "hold on;\n"; - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(trend) << ";\n"; - //file << "plot(t, f);\n"; - //TDoubleVec f; - //TDoubleVec r; - for (std::size_t i = 0u; i < times.size(); ++i) { core_t::TTime time = times[i]; double value = trend[i] + noise[i]; decomposition.addPoint(time, value); + debug.addValue(time, value); if (time >= lastWeek + WEEK) { LOG_DEBUG(<< "Processing week"); @@ -119,8 +164,7 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines() { std::max(std::max(prediction.first - trend[t / HALF_HOUR], trend[t / HALF_HOUR] - prediction.second), 0.0); - //f.push_back(mean(value)); - //r.push_back(mean(value) - trend[t / HALF_HOUR]); + debug.addPrediction(t, mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -130,7 +174,7 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines() { if (time >= 2 * WEEK) { CPPUNIT_ASSERT(sumResidual < 0.052 * sumValue); CPPUNIT_ASSERT(maxResidual < 0.10 * maxValue); - CPPUNIT_ASSERT(percentileError < 0.02 * sumValue); + CPPUNIT_ASSERT(percentileError < 0.03 * sumValue); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; totalSumValue += sumValue; @@ -146,13 +190,8 @@ void CTimeSeriesDecompositionTest::testSuperpositionOfSines() { LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "r = " << core::CContainerPrinter::print(r) << ";\n"; - //file << "plot(t(1:length(fe)), fe, 'r');\n"; - //file << "plot(t(1:length(r)), r, 'k');\n"; - - CPPUNIT_ASSERT(totalSumResidual < 0.019 * totalSumValue); - CPPUNIT_ASSERT(totalMaxResidual < 0.020 * totalMaxValue); + CPPUNIT_ASSERT(totalSumResidual < 0.014 * totalSumValue); + CPPUNIT_ASSERT(totalMaxResidual < 0.017 * totalMaxValue); CPPUNIT_ASSERT(totalPercentileError < 0.01 * totalSumValue); } @@ -257,10 +296,7 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() { core_t::TTime time = startTime; core_t::TTime lastWeek = startTime; maths::CTimeSeriesDecomposition decomposition(0.01, bucketLength); - - //std::ofstream file; - //file.open("results.m"); - //file << "hold on;\n"; + CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; @@ -270,37 +306,32 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() { for (std::size_t i = 0u; i < timeseries.size(); ++i, time += bucketLength) { decomposition.addPoint(time, timeseries[i]); + debug.addValue(time, timeseries[i]); if (time >= lastWeek + WEEK || i == boost::size(timeseries) - 1) { LOG_DEBUG(<< "Processing week"); - //TDoubleVec t; - //TDoubleVec f; - //TDoubleVec fe; - double sumResidual = 0.0; double maxResidual = 0.0; double sumValue = 0.0; double maxValue = 0.0; double percentileError = 0.0; - for (core_t::TTime tt = lastWeek; - tt < lastWeek + WEEK && - static_cast(tt / HOUR) < boost::size(timeseries); - tt += HOUR) { - TDoubleDoublePr prediction = decomposition.value(tt, 70.0); - double residual = std::fabs(timeseries[tt / HOUR] - mean(prediction)); + for (core_t::TTime t = lastWeek; + t < lastWeek + WEEK && + static_cast(t / HOUR) < boost::size(timeseries); + t += HOUR) { + TDoubleDoublePr prediction = decomposition.value(t, 70.0); + double residual = std::fabs(timeseries[t / HOUR] - mean(prediction)); sumResidual += residual; maxResidual = std::max(maxResidual, residual); - sumValue += std::fabs(timeseries[tt / HOUR]); - maxValue = std::max(maxValue, std::fabs(timeseries[tt / HOUR])); + sumValue += std::fabs(timeseries[t / HOUR]); + maxValue = std::max(maxValue, std::fabs(timeseries[t / HOUR])); percentileError += - std::max(std::max(prediction.first - timeseries[tt / HOUR], - timeseries[tt / HOUR] - prediction.second), + std::max(std::max(prediction.first - timeseries[t / HOUR], + timeseries[t / HOUR] - prediction.second), 0.0); - //t.push_back(tt); - //f.push_back(timeseries[tt / HOUR]); - //fe.push_back(mean(value)); + debug.addPrediction(t, mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -308,9 +339,9 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() { LOG_DEBUG(<< "70% error = " << percentileError / sumValue); if (time >= 2 * WEEK) { - CPPUNIT_ASSERT(sumResidual < 0.27 * sumValue); + CPPUNIT_ASSERT(sumResidual < 0.29 * sumValue); CPPUNIT_ASSERT(maxResidual < 0.56 * maxValue); - CPPUNIT_ASSERT(percentileError < 0.16 * sumValue); + CPPUNIT_ASSERT(percentileError < 0.22 * sumValue); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -319,12 +350,6 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() { totalPercentileError += percentileError; } - //file << "t = " << core::CContainerPrinter::print(t) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "fe = " << core::CContainerPrinter::print(fe) << ";\n"; - //file << "plot(t, f);\n"; - //file << "plot(t, fe, 'r');\n"; - lastWeek += WEEK; } } @@ -335,7 +360,7 @@ void CTimeSeriesDecompositionTest::testDistortedPeriodic() { CPPUNIT_ASSERT(totalSumResidual < 0.18 * totalSumValue); CPPUNIT_ASSERT(totalMaxResidual < 0.28 * totalMaxValue); - CPPUNIT_ASSERT(totalPercentileError < 0.09 * totalSumValue); + CPPUNIT_ASSERT(totalPercentileError < 0.1 * totalSumValue); } void CTimeSeriesDecompositionTest::testMinimizeLongComponents() { @@ -357,15 +382,7 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() { rng.generateNormalSamples(0.0, 16.0, times.size(), noise); maths::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); - - //std::ofstream file; - //file.open("results.m"); - //file << "hold on;\n"; - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(trend) << ";\n"; - //file << "plot(t, f);"; - //TDoubleVec f; - //TDoubleVec r; + CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; @@ -381,6 +398,7 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() { double value = trend[i] + noise[i]; decomposition.addPoint(time, value); + debug.addValue(time, value); if (time >= lastWeek + WEEK) { LOG_DEBUG(<< "Processing week"); @@ -402,8 +420,7 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() { std::max(std::max(prediction.first - trend[t / HALF_HOUR], trend[t / HALF_HOUR] - prediction.second), 0.0); - //f.push_back(mean(value)); - //r.push_back(residual); + debug.addPrediction(t, mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -411,9 +428,9 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() { LOG_DEBUG(<< "70% error = " << percentileError / sumValue); if (time >= 2 * WEEK) { - CPPUNIT_ASSERT(sumResidual < 0.16 * sumValue); - CPPUNIT_ASSERT(maxResidual < 0.35 * maxValue); - CPPUNIT_ASSERT(percentileError < 0.05 * sumValue); + CPPUNIT_ASSERT(sumResidual < 0.15 * sumValue); + CPPUNIT_ASSERT(maxResidual < 0.33 * maxValue); + CPPUNIT_ASSERT(percentileError < 0.08 * sumValue); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -426,8 +443,7 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() { double slope = component.valueSpline().absSlope(); meanSlope += slope; LOG_DEBUG(<< "weekly |slope| = " << slope); - - CPPUNIT_ASSERT(slope < 0.0018); + CPPUNIT_ASSERT(slope < 0.0014); refinements += 1.0; } } @@ -441,18 +457,13 @@ void CTimeSeriesDecompositionTest::testMinimizeLongComponents() { LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "r = " << core::CContainerPrinter::print(r) << ";\n"; - //file << "plot(t(1:length(fe)), fe, 'r');\n"; - //file << "plot(t(1:length(r)), r, 'k');\n"; - - CPPUNIT_ASSERT(totalSumResidual < 0.06 * totalSumValue); - CPPUNIT_ASSERT(totalMaxResidual < 0.22 * totalMaxValue); - CPPUNIT_ASSERT(totalPercentileError < 0.03 * totalSumValue); + CPPUNIT_ASSERT(totalSumResidual < 0.05 * totalSumValue); + CPPUNIT_ASSERT(totalMaxResidual < 0.19 * totalMaxValue); + CPPUNIT_ASSERT(totalPercentileError < 0.02 * totalSumValue); meanSlope /= refinements; LOG_DEBUG(<< "mean weekly |slope| = " << meanSlope); - CPPUNIT_ASSERT(meanSlope < 0.0015); + CPPUNIT_ASSERT(meanSlope < 0.0013); } void CTimeSeriesDecompositionTest::testWeekend() { @@ -474,15 +485,7 @@ void CTimeSeriesDecompositionTest::testWeekend() { rng.generateNormalSamples(0.0, 20.0, times.size(), noise); maths::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); - - //std::ofstream file; - //file.open("results.m"); - //file << "hold on;\n"; - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(trend) << ";\n"; - //file << "plot(t, f);"; - //TDoubleVec f; - //TDoubleVec r; + CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; @@ -496,6 +499,7 @@ void CTimeSeriesDecompositionTest::testWeekend() { double value = trend[i] + noise[i]; decomposition.addPoint(time, value); + debug.addValue(time, value); if (time >= lastWeek + WEEK) { LOG_DEBUG(<< "Processing week"); @@ -517,8 +521,7 @@ void CTimeSeriesDecompositionTest::testWeekend() { std::max(std::max(prediction.first - trend[t / HALF_HOUR], trend[t / HALF_HOUR] - prediction.second), 0.0); - //f.push_back(mean(value)); - //r.push_back(residual); + debug.addPrediction(t, mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -541,11 +544,6 @@ void CTimeSeriesDecompositionTest::testWeekend() { } } - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "r = " << core::CContainerPrinter::print(r) << ";\n"; - //file << "plot(t(1:length(fe)), fe, 'r');\n"; - //file << "plot(t(1:length(r)), r, 'k');\n"; - LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); @@ -573,15 +571,7 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() { rng.generateNormalSamples(noiseMean, noiseVariance, times.size(), noise); maths::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); - - //std::ofstream file; - //file.open("results.m"); - //file << "hold on;\n"; - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(timeseries) << ";\n"; - //file << "plot(t, f);\n"; - //TDoubleVec f; - //TDoubleVec r; + CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; @@ -595,6 +585,7 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() { double value = trend[i] + noise[i]; decomposition.addPoint(time, value); + debug.addValue(time, value); if (time >= lastWeek + WEEK) { LOG_DEBUG(<< "Processing week"); @@ -617,8 +608,7 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() { std::max(prediction.first - (trend[t / HALF_HOUR] + noiseMean), (trend[t / HALF_HOUR] + noiseMean) - prediction.second), 0.0); - //f.push_back(mean(value)); - //r.push_back(residual); + debug.addPrediction(t, mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -626,9 +616,9 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() { LOG_DEBUG(<< "70% error = " << percentileError / sumValue); if (time >= 1 * WEEK) { - CPPUNIT_ASSERT(sumResidual < 0.06 * sumValue); - CPPUNIT_ASSERT(maxResidual < 0.08 * maxValue); - CPPUNIT_ASSERT(percentileError < 0.02 * sumValue); + CPPUNIT_ASSERT(sumResidual < 0.025 * sumValue); + CPPUNIT_ASSERT(maxResidual < 0.035 * maxValue); + CPPUNIT_ASSERT(percentileError < 0.01 * sumValue); totalSumResidual += sumResidual; totalMaxResidual += maxResidual; @@ -650,8 +640,8 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() { LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); - CPPUNIT_ASSERT(totalSumResidual < 0.013 * totalSumValue); - CPPUNIT_ASSERT(totalMaxResidual < 0.023 * totalMaxValue); + CPPUNIT_ASSERT(totalSumResidual < 0.014 * totalSumValue); + CPPUNIT_ASSERT(totalMaxResidual < 0.022 * totalMaxValue); CPPUNIT_ASSERT(totalPercentileError < 0.01 * totalSumValue); // Check that only the daily component has been initialized. @@ -659,11 +649,6 @@ void CTimeSeriesDecompositionTest::testSinglePeriodicity() { CPPUNIT_ASSERT_EQUAL(std::size_t(1), components.size()); CPPUNIT_ASSERT_EQUAL(DAY, components[0].time().period()); CPPUNIT_ASSERT(components[0].initialized()); - - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "r = " << core::CContainerPrinter::print(r) << ";\n"; - //file << "plot(t(1:length(fe)), fe, 'r');\n"; - //file << "plot(t(1:length(r)), r, 'k');\n"; } void CTimeSeriesDecompositionTest::testSeasonalOnset() { @@ -689,15 +674,7 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset() { rng.generateNormalSamples(0.0, 4.0, times.size(), noise); maths::CTimeSeriesDecomposition decomposition(0.01, HOUR); - - //std::ofstream file; - //file.open("results.m"); - //file << "hold on;\n"; - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(trend) << ";\n"; - //file << "plot(t, f, 'r');\n"; - //TDoubleVec f; - //TDoubleVec r; + CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; @@ -711,6 +688,7 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset() { double value = trend[i] + noise[i]; decomposition.addPoint(time, value); + debug.addValue(time, value); if (time >= lastWeek + WEEK) { LOG_DEBUG(<< "Processing week"); @@ -731,8 +709,7 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset() { std::max(std::max(prediction.first - trend[t / HOUR], trend[t / HOUR] - prediction.second), 0.0); - //f.push_back(mean(value)); - //r.push_back(residual); + debug.addPrediction(t, mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " @@ -767,17 +744,12 @@ void CTimeSeriesDecompositionTest::testSeasonalOnset() { } } - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "r = " << core::CContainerPrinter::print(r) << ";\n"; - //file << "plot(t(1:length(fe)), fe);\n"; - //file << "plot(t(1:length(r)), r, 'k');\n"; - LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); - CPPUNIT_ASSERT(totalSumResidual < 0.062 * totalSumValue); - CPPUNIT_ASSERT(totalMaxResidual < 0.077 * totalMaxValue); - CPPUNIT_ASSERT(totalPercentileError < 0.03 * totalSumValue); + CPPUNIT_ASSERT(totalSumResidual < 0.053 * totalSumValue); + CPPUNIT_ASSERT(totalMaxResidual < 0.071 * totalMaxValue); + CPPUNIT_ASSERT(totalPercentileError < 0.02 * totalSumValue); } void CTimeSeriesDecompositionTest::testVarianceScale() { @@ -830,7 +802,7 @@ void CTimeSeriesDecompositionTest::testVarianceScale() { LOG_DEBUG(<< "mean error = " << maths::CBasicStatistics::mean(error)); LOG_DEBUG(<< "mean 70% error = " << maths::CBasicStatistics::mean(percentileError)) LOG_DEBUG(<< "mean scale = " << maths::CBasicStatistics::mean(meanScale)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.24); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.28); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(percentileError) < 0.05); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, maths::CBasicStatistics::mean(meanScale), 0.04); } @@ -880,7 +852,7 @@ void CTimeSeriesDecompositionTest::testVarianceScale() { LOG_DEBUG(<< "mean error = " << maths::CBasicStatistics::mean(error)); LOG_DEBUG(<< "mean 70% error = " << maths::CBasicStatistics::mean(percentileError)); LOG_DEBUG(<< "mean scale = " << maths::CBasicStatistics::mean(meanScale)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.22); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.23); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(percentileError) < 0.1); CPPUNIT_ASSERT_DOUBLES_EQUAL(1.0, maths::CBasicStatistics::mean(meanScale), 0.01); } @@ -947,6 +919,7 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { maths::CTimeSeriesDecomposition decomposition(0.01, FIVE_MINS); maths::CNormalMeanPrecConjugate model = maths::CNormalMeanPrecConjugate::nonInformativePrior( maths_t::E_ContinuousData, 0.01); + CDebugGenerator debug; core_t::TTime lastWeek = (startTime / WEEK + 1) * WEEK; TTimeDoublePrVec lastWeekTimeseries; @@ -975,6 +948,7 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { std::max(prediction.first - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction.second), 0.0); + debug.addPrediction(lastWeekTimeseries[j].first, mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " @@ -1003,15 +977,16 @@ void CTimeSeriesDecompositionTest::testSpikeyDataProblemCase() { } model.addSamples({decomposition.detrend(time, value, 70.0)}, maths_t::CUnitWeights::SINGLE_UNIT); + debug.addValue(time, value); } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); - CPPUNIT_ASSERT(totalSumResidual < 0.19 * totalSumValue); - CPPUNIT_ASSERT(totalMaxResidual < 0.31 * totalMaxValue); - CPPUNIT_ASSERT(totalPercentileError < 0.13 * totalSumValue); + CPPUNIT_ASSERT(totalSumResidual < 0.20 * totalSumValue); + CPPUNIT_ASSERT(totalMaxResidual < 0.38 * totalMaxValue); + CPPUNIT_ASSERT(totalPercentileError < 0.17 * totalSumValue); //std::ofstream file; //file.open("results.m"); @@ -1078,13 +1053,6 @@ void CTimeSeriesDecompositionTest::testVeryLargeValuesProblemCase() { << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec times; - //TDoubleVec values; - //TDoubleVec f; - //TDoubleVec r; - double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; @@ -1092,6 +1060,7 @@ void CTimeSeriesDecompositionTest::testVeryLargeValuesProblemCase() { double totalPercentileError = 0.0; maths::CTimeSeriesDecomposition decomposition(0.01, FIVE_MINS); + CDebugGenerator debug; core_t::TTime lastWeek = (startTime / WEEK + 1) * WEEK; TTimeDoublePrVec lastWeekTimeseries; @@ -1120,10 +1089,7 @@ void CTimeSeriesDecompositionTest::testVeryLargeValuesProblemCase() { std::max(prediction.first - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction.second), 0.0); - //times.push_back(lastWeekTimeseries[j].first); - //values.push_back(lastWeekTimeseries[j].second); - //f.push_back(mean(value)); - //r.push_back(residual); + debug.addPrediction(lastWeekTimeseries[j].first, mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " << sumResidual / sumValue); @@ -1146,24 +1112,16 @@ void CTimeSeriesDecompositionTest::testVeryLargeValuesProblemCase() { } decomposition.addPoint(time, value); + debug.addValue(time, value); } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); - CPPUNIT_ASSERT(totalSumResidual < 0.26 * totalSumValue); - CPPUNIT_ASSERT(totalMaxResidual < 0.72 * totalMaxValue); - CPPUNIT_ASSERT(totalPercentileError < 0.14 * totalSumValue); - - //file << "hold on;\n"; - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(values) << ";\n"; - //file << "plot(t, f, 'r');\n"; - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "r = " << core::CContainerPrinter::print(r) << ";\n"; - //file << "plot(t(1:length(fe)), fe);\n"; - //file << "plot(t(1:length(r)), r, 'k');\n"; + CPPUNIT_ASSERT(totalSumResidual < 0.35 * totalSumValue); + CPPUNIT_ASSERT(totalMaxResidual < 0.73 * totalMaxValue); + CPPUNIT_ASSERT(totalPercentileError < 0.17 * totalSumValue); TMeanAccumulator scale; double variance = decomposition.meanVariance(); @@ -1190,13 +1148,6 @@ void CTimeSeriesDecompositionTest::testMixedSmoothAndSpikeyDataProblemCase() { << core::CContainerPrinter::print(timeseries.begin(), timeseries.begin() + 10) << " ..."); - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec times; - //TDoubleVec values; - //TDoubleVec f; - //TDoubleVec r; - double totalSumResidual = 0.0; double totalMaxResidual = 0.0; double totalSumValue = 0.0; @@ -1204,6 +1155,7 @@ void CTimeSeriesDecompositionTest::testMixedSmoothAndSpikeyDataProblemCase() { double totalPercentileError = 0.0; maths::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); + CDebugGenerator debug; core_t::TTime lastWeek = (startTime / WEEK + 1) * WEEK; TTimeDoublePrVec lastWeekTimeseries; @@ -1232,10 +1184,7 @@ void CTimeSeriesDecompositionTest::testMixedSmoothAndSpikeyDataProblemCase() { std::max(prediction.first - lastWeekTimeseries[j].second, lastWeekTimeseries[j].second - prediction.second), 0.0); - //times.push_back(lastWeekTimeseries[j].first); - //values.push_back(lastWeekTimeseries[j].second); - //f.push_back(mean(value)); - //r.push_back(residual); + debug.addPrediction(lastWeekTimeseries[j].first, mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " @@ -1260,39 +1209,30 @@ void CTimeSeriesDecompositionTest::testMixedSmoothAndSpikeyDataProblemCase() { } decomposition.addPoint(time, value); + debug.addValue(time, value); } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); LOG_DEBUG(<< "total 70% error = " << totalPercentileError / totalSumValue); - //file << "hold on;\n"; - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(values) << ";\n"; - //file << "plot(t, f, 'r');\n"; - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "r = " << core::CContainerPrinter::print(r) << ";\n"; - //file << "plot(t(1:length(fe)), fe);\n"; - //file << "plot(t(1:length(r)), r, 'k');\n"; - - CPPUNIT_ASSERT(totalSumResidual < 0.14 * totalSumValue); - CPPUNIT_ASSERT(totalMaxResidual < 0.38 * totalMaxValue); - CPPUNIT_ASSERT(totalPercentileError < 0.04 * totalSumValue); + CPPUNIT_ASSERT(totalSumResidual < 0.15 * totalSumValue); + CPPUNIT_ASSERT(totalMaxResidual < 0.45 * totalMaxValue); + CPPUNIT_ASSERT(totalPercentileError < 0.07 * totalSumValue); } void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() { + // Test the accuracy of the modeling when there are periodically missing + // values. + test::CRandomNumbers rng; LOG_DEBUG(<< "Daily Periodic"); { - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec times; - //TDoubleVec values; - //TDoubleVec f; + maths::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); + CDebugGenerator debug("daily.m"); TMeanAccumulator error; - maths::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); core_t::TTime time = 0; for (std::size_t t = 0u; t < 50; ++t) { for (auto value : @@ -1304,16 +1244,15 @@ void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() { if (value > 0.0) { TDoubleVec noise; rng.generateNormalSamples(10.0, 2.0, 1, noise); - decomposition.addPoint(time, value + noise[0]); + value += noise[0]; + decomposition.addPoint(time, value); + debug.addValue(time, value); + double prediction = + maths::CBasicStatistics::mean(decomposition.value(time, 0.0)); if (decomposition.initialized()) { - error.add(std::fabs((value + noise[0] - - maths::CBasicStatistics::mean( - decomposition.value(time, 0.0)))) / - std::fabs(value + noise[0])); + error.add(std::fabs(value - prediction) / std::fabs(value)); } - //times.push_back(time); - //values.push_back(value + noise[0]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.value(time, 0.0))); + debug.addPrediction(time, prediction, value - prediction); } time += HALF_HOUR; } @@ -1321,25 +1260,14 @@ void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() { LOG_DEBUG(<< "mean error = " << maths::CBasicStatistics::mean(error)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.09); - - //file << "hold on;\n"; - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(values) << ";\n"; - //file << "plot(t, f, 'r');\n"; - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "plot(t(1:length(fe)), fe);\n"; } LOG_DEBUG(<< "Weekly Periodic"); { - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec times; - //TDoubleVec values; - //TDoubleVec f; + maths::CTimeSeriesDecomposition decomposition(0.01, HOUR); + CDebugGenerator debug("weekly.m"); TMeanAccumulator error; - maths::CTimeSeriesDecomposition decomposition(0.01, HOUR); core_t::TTime time = 0; for (std::size_t t = 0u; t < 10; ++t) { for (auto value : @@ -1363,34 +1291,28 @@ void CTimeSeriesDecompositionTest::testDiurnalPeriodicityWithMissingValues() { if (value > 0.0) { TDoubleVec noise; rng.generateNormalSamples(10.0, 2.0, 1, noise); - decomposition.addPoint(time, value + noise[0]); + value += noise[0]; + decomposition.addPoint(time, value); + debug.addValue(time, value); + double prediction = + maths::CBasicStatistics::mean(decomposition.value(time, 0.0)); if (decomposition.initialized()) { - error.add(std::fabs((value + noise[0] - - maths::CBasicStatistics::mean( - decomposition.value(time, 0.0)))) / - std::fabs(value + noise[0])); + error.add(std::fabs(value - prediction) / std::fabs(value)); } - //times.push_back(time); - //values.push_back(value + noise[0]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.value(time, 0.0))); + debug.addPrediction(time, prediction, value - prediction); } time += HOUR; } } LOG_DEBUG(<< "mean error = " << maths::CBasicStatistics::mean(error)) - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.1); - - //file << "hold on;\n"; - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(values) << ";\n"; - //file << "plot(t, f, 'r');\n"; - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "plot(t(1:length(fe)), fe);\n"; + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.11); } } void CTimeSeriesDecompositionTest::testLongTermTrend() { + // Test a simple linear ramp and non-periodic saw tooth series. + const core_t::TTime length = 120 * DAY; TTimeVec times; @@ -1400,11 +1322,6 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { TDoubleVec noise; rng.generateNormalSamples(0.0, 25.0, length / HALF_HOUR, noise); - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec f; - //TDoubleVec values; - LOG_DEBUG(<< "Linear Ramp"); { for (core_t::TTime time = 0; time < length; time += HALF_HOUR) { @@ -1413,6 +1330,7 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { } maths::CTimeSeriesDecomposition decomposition(0.024, HALF_HOUR); + CDebugGenerator debug("ramp.m"); double totalSumResidual = 0.0; double totalMaxResidual = 0.0; @@ -1422,6 +1340,7 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { for (std::size_t i = 0u; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + noise[i]); + debug.addValue(times[i], trend[i] + noise[i]); if (times[i] > lastDay + DAY) { LOG_DEBUG(<< "Processing day " << times[i] / DAY); @@ -1439,6 +1358,7 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[j]); maxValue = std::max(maxValue, std::fabs(trend[j])); + debug.addPrediction(times[j], mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " @@ -1456,20 +1376,11 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { } lastDay += DAY; } - //values.push_back(trend[i] + noise[i]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.value(times[i]))); } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(values) << ";\n"; - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "hold on;\n"; - //file << "plot(t, f, 'r');\n"; - //file << "plot(t, fe);\n"; - CPPUNIT_ASSERT(totalSumResidual / totalSumValue < 0.01); CPPUNIT_ASSERT(totalMaxResidual / totalMaxValue < 0.01); } @@ -1493,6 +1404,7 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { } maths::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); + CDebugGenerator debug("saw_tooth.m"); double totalSumResidual = 0.0; double totalMaxResidual = 0.0; @@ -1502,6 +1414,7 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { for (std::size_t i = 0u; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + 0.3 * noise[i]); + debug.addValue(times[i], trend[i] + 0.3 * noise[i]); if (times[i] > lastDay + DAY) { LOG_DEBUG(<< "Processing day " << times[i] / DAY); @@ -1519,6 +1432,7 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[j]); maxValue = std::max(maxValue, std::fabs(trend[j])); + debug.addPrediction(times[j], mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " @@ -1533,27 +1447,18 @@ void CTimeSeriesDecompositionTest::testLongTermTrend() { } lastDay += DAY; } - //values.push_back(trend[i] + 0.3*noise[i]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.value(times[i]))); } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(values) << ";\n"; - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "hold on;\n"; - //file << "plot(t, f, 'r');\n"; - //file << "plot(t, fe);\n"; - CPPUNIT_ASSERT(totalSumResidual / totalSumValue < 0.38); CPPUNIT_ASSERT(totalMaxResidual / totalMaxValue < 0.41); } } void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() { - // Test long term mean reverting component plus daily periodic component. + // Test a long term mean reverting component plus daily periodic component. TTimeVec times; TDoubleVec trend; @@ -1573,12 +1478,8 @@ void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() { TDoubleVec noise; rng.generateNormalSamples(0.0, 4.0, times.size(), noise); - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec f; - //TDoubleVec values; - maths::CTimeSeriesDecomposition decomposition(0.024, HALF_HOUR); + CDebugGenerator debug; double totalSumResidual = 0.0; double totalMaxResidual = 0.0; @@ -1588,6 +1489,7 @@ void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() { for (std::size_t i = 0u; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + 0.3 * noise[i]); + debug.addValue(times[i], trend[i] + 0.3 * noise[i]); if (times[i] > lastDay + DAY) { LOG_DEBUG(<< "Processing day " << times[i] / DAY); @@ -1605,6 +1507,7 @@ void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() { maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[j]); maxValue = std::max(maxValue, std::fabs(trend[j])); + debug.addPrediction(times[j], mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " @@ -1617,29 +1520,23 @@ void CTimeSeriesDecompositionTest::testLongTermTrendAndPeriodicity() { totalSumValue += sumValue; totalMaxValue += maxValue; - CPPUNIT_ASSERT(sumResidual / sumValue < 0.34); - CPPUNIT_ASSERT(maxResidual / maxValue < 0.39); + CPPUNIT_ASSERT(sumResidual / sumValue < 0.42); + CPPUNIT_ASSERT(maxResidual / maxValue < 0.46); } lastDay += DAY; } - //values.push_back(trend[i] + 0.3 * noise[i]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.value(times[i]))); } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(values) << ";\n"; - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "plot(t, f, 'r');\n"; - //file << "plot(t, fe);\n"; - CPPUNIT_ASSERT(totalSumResidual / totalSumValue < 0.04); CPPUNIT_ASSERT(totalMaxResidual / totalMaxValue < 0.05); } void CTimeSeriesDecompositionTest::testNonDiurnal() { + // Test the accuracy of the modeling of some non-daily or weekly + // seasonal components. test::CRandomNumbers rng; LOG_DEBUG(<< "Hourly"); @@ -1650,7 +1547,7 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { 2.0, 1.0, 0.5, 0.5, 1.0, 6.0}; TTimeVec times; - TDoubleVec trends[2]{TDoubleVec(), TDoubleVec(8 * DAY / FIVE_MINS)}; + TDoubleVec trends[2]{{}, {8 * DAY / FIVE_MINS}}; for (core_t::TTime time = 0; time < length; time += FIVE_MINS) { times.push_back(time); trends[0].push_back(periodic[(time / FIVE_MINS) % 12]); @@ -1661,15 +1558,11 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { rng.generateNormalSamples(0.0, 1.0, trends[1].size(), noise); core_t::TTime startTesting[]{3 * HOUR, 16 * DAY}; - TDoubleVec thresholds[]{TDoubleVec{0.08, 0.07}, TDoubleVec{0.18, 0.15}}; + TDoubleVec thresholds[]{TDoubleVec{0.08, 0.08}, TDoubleVec{0.16, 0.1}}; for (std::size_t t = 0u; t < 2; ++t) { - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec f; - //TDoubleVec values; - maths::CTimeSeriesDecomposition decomposition(0.01, FIVE_MINS); + CDebugGenerator debug("hourly." + core::CStringUtils::typeToString(t) + ".m"); double totalSumResidual = 0.0; double totalMaxResidual = 0.0; @@ -1679,6 +1572,7 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { for (std::size_t i = 0u; i < times.size(); ++i) { decomposition.addPoint(times[i], trends[t][i] + noise[i]); + debug.addValue(times[i], trends[t][i] + noise[i]); if (times[i] > lastHour + HOUR) { LOG_DEBUG(<< "Processing hour " << times[i] / HOUR); @@ -1697,6 +1591,7 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trends[t][j]); maxValue = std::max(maxValue, std::fabs(trends[t][j])); + debug.addPrediction(times[j], mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " @@ -1714,8 +1609,6 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { } lastHour += HOUR; } - //values.push_back(trends[t][i] + noise[i]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.value(times[i]))); } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " @@ -1723,12 +1616,6 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(values) << ";\n"; - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "plot(t, f, 'r');\n"; - //file << "plot(t, fe);\n"; - CPPUNIT_ASSERT(totalSumResidual / totalSumValue < thresholds[t][0]); CPPUNIT_ASSERT(totalMaxResidual / totalMaxValue < thresholds[t][1]); } @@ -1751,13 +1638,9 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { TDoubleVec noise; rng.generateNormalSamples(0.0, 2.0, times.size(), noise); - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec f; - //TDoubleVec values; - core_t::TTime startTesting{14 * DAY}; maths::CTimeSeriesDecomposition decomposition(0.01, TEN_MINS); + CDebugGenerator debug("two_day.m"); double totalSumResidual = 0.0; double totalMaxResidual = 0.0; @@ -1767,6 +1650,7 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { for (std::size_t i = 0u; i < times.size(); ++i) { decomposition.addPoint(times[i], trend[i] + noise[i]); + debug.addValue(times[i], trend[i] + noise[i]); if (times[i] > lastTwoDay + 2 * DAY) { LOG_DEBUG(<< "Processing two days " << times[i] / 2 * DAY); @@ -1784,6 +1668,7 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { maxResidual = std::max(maxResidual, residual); sumValue += std::fabs(trend[j]); maxValue = std::max(maxValue, std::fabs(trend[j])); + debug.addPrediction(times[j], mean(prediction), residual); } LOG_DEBUG(<< "'sum residual' / 'sum value' = " @@ -1797,30 +1682,22 @@ void CTimeSeriesDecompositionTest::testNonDiurnal() { totalMaxValue += maxValue; CPPUNIT_ASSERT(sumResidual / sumValue < 0.17); - CPPUNIT_ASSERT(maxResidual / maxValue < 0.21); + CPPUNIT_ASSERT(maxResidual / maxValue < 0.23); } lastTwoDay += 2 * DAY; } - //values.push_back(trend[i] + noise[i]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.value(times[i]))); } LOG_DEBUG(<< "total 'sum residual' / 'sum value' = " << totalSumResidual / totalSumValue); LOG_DEBUG(<< "total 'max residual' / 'max value' = " << totalMaxResidual / totalMaxValue); - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(values) << ";\n"; - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "plot(t, f, 'r');\n"; - //file << "plot(t, fe);\n"; - - CPPUNIT_ASSERT(totalSumResidual / totalSumValue < 0.1); - CPPUNIT_ASSERT(totalMaxResidual / totalMaxValue < 0.18); + CPPUNIT_ASSERT(totalSumResidual / totalSumValue < 0.11); + CPPUNIT_ASSERT(totalMaxResidual / totalMaxValue < 0.20); } } void CTimeSeriesDecompositionTest::testYearly() { - using TDouble1Vec = core::CSmallVector; + // Test a yearly seasonal component. test::CRandomNumbers rng; @@ -1828,6 +1705,8 @@ void CTimeSeriesDecompositionTest::testYearly() { maths::CDecayRateController controller(maths::CDecayRateController::E_PredictionBias | maths::CDecayRateController::E_PredictionErrorIncrease, 1); + CDebugGenerator debug; + TDoubleVec noise; core_t::TTime time = 2 * HOUR; for (/**/; time < 4 * YEAR; time += 4 * HOUR) { @@ -1847,12 +1726,6 @@ void CTimeSeriesDecompositionTest::testYearly() { } } - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec f; - //TTimeVec times; - //TDoubleVec values; - // Predict over one year and check we get reasonable accuracy. TMeanAccumulator meanError; for (/**/; time < 5 * YEAR; time += 4 * HOUR) { @@ -1864,24 +1737,17 @@ void CTimeSeriesDecompositionTest::testYearly() { double prediction = maths::CBasicStatistics::mean(decomposition.value(time, 0.0)); double error = std::fabs((prediction - trend) / trend); meanError.add(error); - //times.push_back(time); - //values.push_back(trend); - //f.push_back(prediction); + debug.addValue(time, trend); + debug.addPrediction(time, prediction, trend - prediction); if (time / HOUR % 40 == 0) { LOG_DEBUG(<< "error = " << error); } CPPUNIT_ASSERT(error < 0.1); } - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(values) << ";\n"; - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "plot(t, f, 'r');\n"; - //file << "plot(t, fe);\n"; - LOG_DEBUG(<< "mean error = " << maths::CBasicStatistics::mean(meanError)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.011); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanError) < 0.012); } void CTimeSeriesDecompositionTest::testWithOutliers() { @@ -1902,12 +1768,6 @@ void CTimeSeriesDecompositionTest::testWithOutliers() { rng.generateNormalSamples(0.0, 9.0, buckets, noise); std::sort(outliers.begin(), outliers.end()); - //std::ofstream file; - //file.open("results.m"); - //TTimeVec times; - //TDoubleVec values; - //TDoubleVec f; - auto trend = [](core_t::TTime time) { return 25.0 + 20.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / @@ -1915,6 +1775,7 @@ void CTimeSeriesDecompositionTest::testWithOutliers() { }; maths::CTimeSeriesDecomposition decomposition(0.01, TEN_MINS); + CDebugGenerator debug; for (core_t::TTime time = 0; time < WEEK; time += TEN_MINS) { std::size_t bucket(time / TEN_MINS); @@ -1930,21 +1791,15 @@ void CTimeSeriesDecompositionTest::testWithOutliers() { double prediction = maths::CBasicStatistics::mean(decomposition.value(time, 0.0)); error.add(std::fabs(prediction - trend(time)) / trend(time)); - //times.push_back(time); - //values.push_back(trend(time)); - //f.push_back(prediction); + debug.addValue(time, value); + debug.addPrediction(time, prediction, trend(time) - prediction); } - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(values) << ";\n"; - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "plot(t, f, 'r');\n"; - //file << "plot(t, fe);\n"; - LOG_DEBUG(<< "error = " << maths::CBasicStatistics::mean(error)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.05); break; } + debug.addValue(time, value); } } @@ -1978,18 +1833,14 @@ void CTimeSeriesDecompositionTest::testCalendar() { test::CRandomNumbers rng; maths::CTimeSeriesDecomposition decomposition(0.01, HALF_HOUR); - - //std::ofstream file; - //file.open("results.m"); - //TDoubleVec f; - //TDoubleVec times; - //TDoubleVec values; + CDebugGenerator debug; TDoubleVec noise; for (core_t::TTime time = 0, count = 0; time < end; time += HALF_HOUR) { rng.generateNormalSamples(0.0, 4.0, 1, noise); decomposition.addPoint(time, trend(time) + noise[0]); + debug.addValue(time, trend(time) + noise[0]); if (time - DAY == *std::lower_bound(months.begin(), months.end(), time - DAY)) { LOG_DEBUG(<< "*** time = " << time << " ***"); @@ -2008,23 +1859,14 @@ void CTimeSeriesDecompositionTest::testCalendar() { LOG_DEBUG(<< " trend = " << trend(time_)); ++largeErrorCount; } + debug.addPrediction(time_, prediction, actual - prediction); } LOG_DEBUG(<< "large error count = " << largeErrorCount); CPPUNIT_ASSERT(++count > 4 || largeErrorCount > 15); CPPUNIT_ASSERT(count < 5 || largeErrorCount <= 5); } - - //times.push_back(time); - //values.push_back(trend(time) + noise[0]); - //f.push_back(maths::CBasicStatistics::mean(decomposition.value(time, 0.0))); } - - //file << "t = " << core::CContainerPrinter::print(times) << ";\n"; - //file << "f = " << core::CContainerPrinter::print(values) << ";\n"; - //file << "fe = " << core::CContainerPrinter::print(f) << ";\n"; - //file << "plot(t, f, 'r');\n"; - //file << "plot(t, fe);\n"; } void CTimeSeriesDecompositionTest::testConditionOfTrend() { @@ -2047,6 +1889,72 @@ void CTimeSeriesDecompositionTest::testConditionOfTrend() { } } +void CTimeSeriesDecompositionTest::testComponentLifecycle() { + // Test we adapt to changing seasonality adding and removing components + // as necessary. + + test::CRandomNumbers rng; + + auto trend = [](core_t::TTime time) { + return 20.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * time / DAY) + + 3.0 * (time > 4 * WEEK + ? std::sin(boost::math::double_constants::two_pi * time / HOUR) + : 0.0) - + 3.0 * (time > 9 * WEEK + ? std::sin(boost::math::double_constants::two_pi * time / HOUR) + : 0.0) + + 8.0 * (time > 16 * WEEK + ? std::sin(boost::math::double_constants::two_pi * time / 4 / DAY) + : 0.0) - + 8.0 * (time > 21 * WEEK + ? std::sin(boost::math::double_constants::two_pi * time / 4 / DAY) + : 0.0); + }; + + maths::CTimeSeriesDecomposition decomposition(0.012, FIVE_MINS); + maths::CDecayRateController controller(maths::CDecayRateController::E_PredictionBias | + maths::CDecayRateController::E_PredictionErrorIncrease, + 1); + CDebugGenerator debug; + + TMeanAccumulator errors[4]; + TDoubleVec noise; + for (core_t::TTime time = 0; time < 35 * WEEK; time += FIVE_MINS) { + rng.generateNormalSamples(0.0, 1.0, 1, noise); + decomposition.addPoint(time, trend(time) + noise[0]); + debug.addValue(time, trend(time) + noise[0]); + + if (decomposition.initialized()) { + TDouble1Vec prediction{decomposition.meanValue(time)}; + TDouble1Vec predictionError{ + decomposition.detrend(time, trend(time) + noise[0], 0.0)}; + double multiplier{controller.multiplier(prediction, {predictionError}, + FIVE_MINS, 1.0, 0.0001)}; + decomposition.decayRate(multiplier * decomposition.decayRate()); + } + + double prediction = mean(decomposition.value(time, 0.0)); + if (time > 24 * WEEK) { + errors[3].add(std::fabs(prediction - trend(time)) / trend(time)); + } else if (time > 18 * WEEK && time < 21 * WEEK) { + errors[2].add(std::fabs(prediction - trend(time)) / trend(time)); + } else if (time > 11 * WEEK && time < 14 * WEEK) { + errors[1].add(std::fabs(prediction - trend(time)) / trend(time)); + } else if (time > 6 * WEEK && time < 9 * WEEK) { + errors[0].add(std::fabs(prediction - trend(time)) / trend(time)); + } + + debug.addPrediction(time, prediction, trend(time) + noise[0] - prediction); + } + + double bounds[]{0.01, 0.017, 0.012, 0.072}; + for (std::size_t i = 0; i < 4; ++i) { + double error{maths::CBasicStatistics::mean(errors[i])}; + LOG_DEBUG(<< "error = " << error); + CPPUNIT_ASSERT(error < bounds[i]); + } +} + void CTimeSeriesDecompositionTest::testSwap() { const double decayRate = 0.01; const core_t::TTime bucketLength = HALF_HOUR; @@ -2054,7 +1962,7 @@ void CTimeSeriesDecompositionTest::testSwap() { TTimeVec times; TDoubleVec trend1; TDoubleVec trend2; - for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) { + for (core_t::TTime time = 0; time <= 10 * WEEK; time += HALF_HOUR) { double daily = 15.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); @@ -2093,7 +2001,7 @@ void CTimeSeriesDecompositionTest::testPersist() { TTimeVec times; TDoubleVec trend; - for (core_t::TTime time = 0; time < 10 * WEEK + 1; time += HALF_HOUR) { + for (core_t::TTime time = 0; time <= 10 * WEEK; time += HALF_HOUR) { double daily = 15.0 + 10.0 * std::sin(boost::math::double_constants::two_pi * static_cast(time) / static_cast(DAY)); @@ -2351,6 +2259,9 @@ CppUnit::Test* CTimeSeriesDecompositionTest::suite() { suiteOfTests->addTest(new CppUnit::TestCaller( "CTimeSeriesDecompositionTest::testConditionOfTrend", &CTimeSeriesDecompositionTest::testConditionOfTrend)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CTimeSeriesDecompositionTest::testComponentLifecycle", + &CTimeSeriesDecompositionTest::testComponentLifecycle)); suiteOfTests->addTest(new CppUnit::TestCaller( "CTimeSeriesDecompositionTest::testSwap", &CTimeSeriesDecompositionTest::testSwap)); suiteOfTests->addTest(new CppUnit::TestCaller( diff --git a/lib/maths/unittest/CTimeSeriesDecompositionTest.h b/lib/maths/unittest/CTimeSeriesDecompositionTest.h index d7df6781f4..3f6edccc23 100644 --- a/lib/maths/unittest/CTimeSeriesDecompositionTest.h +++ b/lib/maths/unittest/CTimeSeriesDecompositionTest.h @@ -29,6 +29,7 @@ class CTimeSeriesDecompositionTest : public CppUnit::TestFixture { void testWithOutliers(); void testCalendar(); void testConditionOfTrend(); + void testComponentLifecycle(); void testSwap(); void testPersist(); void testUpgrade(); diff --git a/lib/maths/unittest/CTimeSeriesModelTest.cc b/lib/maths/unittest/CTimeSeriesModelTest.cc index 100218086b..c70fc186ae 100644 --- a/lib/maths/unittest/CTimeSeriesModelTest.cc +++ b/lib/maths/unittest/CTimeSeriesModelTest.cc @@ -1407,7 +1407,7 @@ void CTimeSeriesModelTest::testWeights() { } } LOG_DEBUG(<< "error = " << maths::CBasicStatistics::mean(error)); - CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.21); + CPPUNIT_ASSERT(maths::CBasicStatistics::mean(error) < 0.26); LOG_DEBUG(<< "Winsorisation"); TDouble2Vec prediction(model.predict(time)); diff --git a/lib/model/unittest/CEventRateModelTest.cc b/lib/model/unittest/CEventRateModelTest.cc index c62d9dc941..700972d998 100644 --- a/lib/model/unittest/CEventRateModelTest.cc +++ b/lib/model/unittest/CEventRateModelTest.cc @@ -2835,7 +2835,7 @@ void CEventRateModelTest::testDecayRateControl() { LOG_DEBUG(<< "reference = " << maths::CBasicStatistics::mean(meanReferencePredictionError)); CPPUNIT_ASSERT(maths::CBasicStatistics::mean(meanPredictionError) < - 0.7 * maths::CBasicStatistics::mean(meanReferencePredictionError)); + 0.76 * maths::CBasicStatistics::mean(meanReferencePredictionError)); } }