elastic · tveasey · Feb 27, 2018 · Feb 21, 2018 · Feb 27, 2018 · Feb 27, 2018
diff --git a/include/maths/CAdaptiveBucketing.h b/include/maths/CAdaptiveBucketing.h
@@ -80,12 +80,10 @@ namespace maths
 class MATHS_EXPORT CAdaptiveBucketing
 {
     public:
-        typedef std::vector<double> TDoubleVec;
-        typedef std::vector<CFloatStorage> TFloatVec;
-        typedef std::pair<core_t::TTime, core_t::TTime> TTimeTimePr;
-        typedef CBasicStatistics::SSampleMeanVar<double>::TAccumulator TDoubleMeanVarAccumulator;
-        typedef std::pair<TTimeTimePr, TDoubleMeanVarAccumulator> TTimeTimePrMeanVarPr;
-        typedef std::vector<TTimeTimePrMeanVarPr> TTimeTimePrMeanVarPrVec;
+        using TDoubleVec = std::vector<double>;
+        using TFloatVec = std::vector<CFloatStorage>;
+        using TFloatMeanAccumulator = CBasicStatistics::SSampleMean<CFloatStorage>::TAccumulator;
+        using TFloatMeanAccumulatorVec = std::vector<TFloatMeanAccumulator>;
 
     public:
         //! Restore by traversing a state document
@@ -116,14 +114,17 @@ class MATHS_EXPORT CAdaptiveBucketing
         //! \param[in] n The number of buckets.
         bool initialize(double a, double b, std::size_t n);
 
-        //! Add the function moments \f$([a_i,b_i], S_i)\f$ where
-        //! \f$S_i\f$ are the means and variances of the function
-        //! in the time intervals \f$([a_i,b_i])\f$.
+        //! Add the function mean values \f$([a_i,b_i], m_i)\f$ where
+        //! \f$m_i\f$ are the means of the function in the time intervals
+        //! \f$([a+(i-1)l,b+il])\f$, \f$i\in[n]\f$ and \f$l=(b-a)/n\f$.
         //!
-        //! \param[in] time The start of the period including \p values.
-        //! \param[in] values Time ranges and the corresponding function
-        //! value moments.
-        void initialValues(core_t::TTime time, const TTimeTimePrMeanVarPrVec &values);
+        //! \param[in] startTime The start of the period.
+        //! \param[in] endTime The start of the period.
+        //! \param[in] values The mean values in a regular subdivision
+        //! of [\p start,\p end].
+        void initialValues(core_t::TTime startTime,
+                           core_t::TTime endTime,
+                           const TFloatMeanAccumulatorVec &values);
 
         //! Get the number of buckets.
         std::size_t size(void) const;
@@ -204,21 +205,18 @@ class MATHS_EXPORT CAdaptiveBucketing
         //! Get the memory used by this component
         std::size_t memoryUsage(void) const;
 
-    private:
-        typedef CBasicStatistics::SSampleMean<CFloatStorage>::TAccumulator TFloatMeanAccumulator;
-
     private:
         //! Compute the values corresponding to the change in end
         //! points from \p endpoints. The values are assigned based
         //! on their intersection with each bucket in the previous
         //! bucket configuration.
         virtual void refresh(const TFloatVec &endpoints) = 0;
 
+        //! Check if \p time is in the this component's window.
+        virtual bool inWindow(core_t::TTime time) const = 0;
+
         //! Add the function value at \p time.
-        virtual void add(std::size_t bucket,
-                         core_t::TTime time,
-                         double offset,
-                         const TDoubleMeanVarAccumulator &value) = 0;
+        virtual void add(std::size_t bucket, core_t::TTime time, double value, double weight) = 0;
 
         //! Get the offset w.r.t. the start of the bucketing of \p time.
         virtual double offset(core_t::TTime time) const = 0;

diff --git a/include/maths/CBasicStatistics.h b/include/maths/CBasicStatistics.h
@@ -85,6 +85,23 @@ class MATHS_EXPORT CBasicStatistics
         //! Compute the sample median.
         static double median(const TDoubleVec &dataIn);
 
+        //! Compute the maximum of \p first, \p second and \p third.
+        template<typename T>
+        static T max(T first, T second, T third)
+        {
+            return  first >= second ?
+                   (third >= first  ? third : first) :
+                   (third >= second ? third : second);
+        }
+
+        //! Compute the minimum of \p first, \p second and \p third.
+        template<typename T>
+        static T min(T first, T second, T third)
+        {
+            return  first <= second ?
+                   (third <= first  ? third : first) :
+                   (third <= second ? third : second);
+        }
 
         /////////////////////////// ACCUMULATORS ///////////////////////////
 
@@ -1620,6 +1637,12 @@ class MATHS_EXPORT CBasicStatistics
                     return m_Max[0];
                 }
 
+                //! Get the range.
+                T range(void) const
+                {
+                    return m_Max[0] - m_Min[0];
+                }
+
                 //! Get the margin by which all the values have the same sign.
                 T signMargin(void) const
                 {

diff --git a/include/maths/CCalendarComponentAdaptiveBucketing.h b/include/maths/CCalendarComponentAdaptiveBucketing.h
@@ -47,8 +47,7 @@ class CSeasonalTime;
 class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucketing
 {
     public:
-        typedef CAdaptiveBucketing::TTimeTimePrMeanVarPrVec TTimeTimePrMeanVarPrVec;
-        typedef CBasicStatistics::SSampleMeanVar<CFloatStorage>::TAccumulator TFloatMeanVarAccumulator;
+        using TFloatMeanVarAccumulator = CBasicStatistics::SSampleMeanVar<CFloatStorage>::TAccumulator;
 
     public:
         CCalendarComponentAdaptiveBucketing(void);
@@ -160,7 +159,7 @@ class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucket
         //@}
 
     private:
-        typedef std::vector<TFloatMeanVarAccumulator> TFloatMeanVarVec;
+        using TFloatMeanVarVec = std::vector<TFloatMeanVarAccumulator>;
 
     private:
         //! Restore by traversing a state document
@@ -174,11 +173,11 @@ class MATHS_EXPORT CCalendarComponentAdaptiveBucketing : private CAdaptiveBucket
         //! \param[in] endpoints The old end points.
         void refresh(const TFloatVec &endpoints);
 
+        //! Check if \p time is in the this component's window.
+        virtual bool inWindow(core_t::TTime time) const;
+
         //! Add the function value to \p bucket.
-        virtual void add(std::size_t bucket,
-                         core_t::TTime time,
-                         double offset,
-                         const TDoubleMeanVarAccumulator &value);
+        virtual void add(std::size_t bucket, core_t::TTime time, double value, double weight);
 
         //! Get the offset w.r.t. the start of the bucketing of \p time.
         virtual double offset(core_t::TTime time) const;

diff --git a/include/maths/CDecompositionComponent.h b/include/maths/CDecompositionComponent.h
@@ -43,15 +43,15 @@ namespace maths
 class MATHS_EXPORT CDecompositionComponent
 {
     public:
-        typedef maths_t::TDoubleDoublePr TDoubleDoublePr;
-        typedef std::vector<double> TDoubleVec;
-        typedef std::vector<CFloatStorage> TFloatVec;
-        typedef CSpline<boost::reference_wrapper<const TFloatVec>,
-                        boost::reference_wrapper<const TFloatVec>,
-                        boost::reference_wrapper<const TDoubleVec> > TSplineCRef;
-        typedef CSpline<boost::reference_wrapper<TFloatVec>,
-                        boost::reference_wrapper<TFloatVec>,
-                        boost::reference_wrapper<TDoubleVec> > TSplineRef;
+        using TDoubleDoublePr = maths_t::TDoubleDoublePr;
+        using TDoubleVec = std::vector<double>;
+        using TFloatVec = std::vector<CFloatStorage>;
+        using TSplineCRef = CSpline<boost::reference_wrapper<const TFloatVec>,
+                                    boost::reference_wrapper<const TFloatVec>,
+                                    boost::reference_wrapper<const TDoubleVec>>;
+        using TSplineRef = CSpline<boost::reference_wrapper<TFloatVec>,
+                                   boost::reference_wrapper<TFloatVec>,
+                                   boost::reference_wrapper<TDoubleVec>>;
 
     public:
         //! Persist state by passing information to \p inserter.
@@ -72,9 +72,9 @@ class MATHS_EXPORT CDecompositionComponent
                 };
 
             public:
-                typedef boost::array<CSplineTypes::EType, 2> TTypeArray;
-                typedef boost::array<TFloatVec, 2> TFloatVecArray;
-                typedef boost::array<TDoubleVec, 2> TDoubleVecArray;
+                using TTypeArray = boost::array<CSplineTypes::EType, 2>;
+                using TFloatVecArray = boost::array<TFloatVec, 2>;
+                using TDoubleVecArray = boost::array<TDoubleVec, 2>;
 
             public:
                 CPackedSplines(CSplineTypes::EType valueInterpolationType,

diff --git a/include/maths/CExpandingWindow.h b/include/maths/CExpandingWindow.h
@@ -0,0 +1,135 @@
+/*
+ * ELASTICSEARCH CONFIDENTIAL
+ *
+ * Copyright (c) 2018 Elasticsearch BV. All Rights Reserved.
+ *
+ * Notice: this software, and all information contained
+ * therein, is the exclusive property of Elasticsearch BV
+ * and its licensors, if any, and is protected under applicable
+ * domestic and foreign law, and international treaties.
+ *
+ * Reproduction, republication or distribution without the
+ * express written consent of Elasticsearch BV is
+ * strictly prohibited.
+ */
+
+#ifndef INCLUDED_ml_maths_CExpandingWindow_h
+#define INCLUDED_ml_maths_CExpandingWindow_h
+
+#include <core/CFloatStorage.h>
+#include <core/CoreTypes.h>
+#include <core/CVectorRange.h>
+
+#include <maths/CBasicStatistics.h>
+#include <maths/ImportExport.h>
+
+#include <cstddef>
+#include <functional>
+#include <vector>
+
+namespace ml
+{
+namespace core
+{
+class CStatePersistInserter;
+class CStateRestoreTraverser;
+}
+
+namespace maths
+{
+
+//! \brief Implements a fixed memory expanding time window.
+//!
+//! DESCRIPTION:\n
+//! As the window expands it compresses by merging adjacent values
+//! and maintaining means of merged values. It cycles through a
+//! sequence of increasing compression factors, which are determined
+//! by a sequence of increasing bucketing lengths supplied to the
+//! constructor. At the point it overflows, i.e. time since the
+//! beginning of the window exceeds "size" x "maximum bucket length",
+//! it will re-initialize the bucketing and update the start time.
+class MATHS_EXPORT CExpandingWindow
+{
+    public:
+        using TDoubleVec = std::vector<double>;
+        using TTimeVec = std::vector<core_t::TTime>;
+        using TTimeCRng = core::CVectorRange<const TTimeVec>;
+        using TFloatMeanAccumulator = CBasicStatistics::SSampleMean<CFloatStorage>::TAccumulator;
+        using TFloatMeanAccumulatorVec = std::vector<TFloatMeanAccumulator>;
+        using TPredictor = std::function<double (core_t::TTime)>;
+
+    public:
+        CExpandingWindow(core_t::TTime bucketLength,
+                         TTimeCRng bucketLengths,
+                         std::size_t size,
+                         double decayRate = 0.0);
+
+        //! Initialize by reading state from \p traverser.
+        bool acceptRestoreTraverser(core::CStateRestoreTraverser &traverser);
+
+        //! Persist state by passing information to \p inserter.
+        void acceptPersistInserter(core::CStatePersistInserter &inserter) const;
+
+        //! Get the start time of the sketch.
+        core_t::TTime startTime() const;
+
+        //! Get the end time of the sketch.
+        core_t::TTime endTime() const;
+
+        //! Get the current bucket length.
+        core_t::TTime bucketLength() const;
+
+        //! Get the bucket values.
+        const TFloatMeanAccumulatorVec &values() const;
+
+        //! Get the bucket values minus the values from \p trend.
+        TFloatMeanAccumulatorVec valuesMinusPrediction(const TPredictor &predictor) const;
+
+        //! Set the start time to \p time.
+        void initialize(core_t::TTime time);
+
+        //! Age the bucket values to account for \p time elapsed time.
+        void propagateForwardsByTime(double time);
+
+        //! Add \p value at \p time.
+        void add(core_t::TTime time, double value, double weight = 1.0);
+
+        //! Check if we need to compress by increasing the bucket span.
+        bool needToCompress(core_t::TTime time) const;
+
+        //! Get a checksum for this object.
+        uint64_t checksum(uint64_t seed = 0) const;
+
+        //! Debug the memory used by this object.
+        void debugMemoryUsage(core::CMemoryUsage::TMemoryUsagePtr mem) const;
+
+        //! Get the memory used by this object.
+        std::size_t memoryUsage() const;
+
+    private:
+        //! The rate at which the bucket values are aged.
+        double m_DecayRate;
+
+        //! The data bucketing length.
+        core_t::TTime m_BucketLength;
+
+        //! The bucket lengths to test.
+        TTimeCRng m_BucketLengths;
+
+        //! The index in m_BucketLengths of the current bucketing interval.
+        std::size_t m_BucketLengthIndex;
+
+        //! The time of the first data point.
+        core_t::TTime m_StartTime;
+
+        //! The bucket values.
+        TFloatMeanAccumulatorVec m_BucketValues;
+
+        //! The mean value time modulo the data bucketing length.
+        TFloatMeanAccumulator m_MeanOffset;
+};
+
+}
+}
+
+#endif // INCLUDED_ml_maths_CExpandingWindow_h
diff --git a/include/maths/CGammaRateConjugate.h b/include/maths/CGammaRateConjugate.h
@@ -85,15 +85,19 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior
         //! \param[in] priorShape The shape parameter of the gamma prior.
         //! \param[in] priorRate The rate parameter of the gamma prior.
         //! \param[in] decayRate The rate at which to revert to non-informative.
+        //! \param[in] offsetMargin The margin between the smallest value and the support
+        //! left end.
         CGammaRateConjugate(maths_t::EDataType dataType,
                             double offset,
                             double priorShape,
                             double priorRate,
-                            double decayRate = 0.0);
+                            double decayRate = 0.0,
+                            double offsetMargin = GAMMA_OFFSET_MARGIN);
 
         //! Construct by traversing a state document.
         CGammaRateConjugate(const SDistributionRestoreParams &params,
-                            core::CStateRestoreTraverser &traverser);
+                            core::CStateRestoreTraverser &traverser,
+                            double offsetMargin = GAMMA_OFFSET_MARGIN);
 
         // Default copy constructor and assignment operator work.
 
@@ -103,10 +107,13 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior
         //! for details).
         //! \param[in] offset The offset to apply to the data.
         //! \param[in] decayRate The rate at which to revert to the non-informative prior.
+        //! \param[in] offsetMargin The margin between the smallest value and the support
+        //! left end.
         //! \return A non-informative prior.
         static CGammaRateConjugate nonInformativePrior(maths_t::EDataType dataType,
                                                        double offset = 0.0,
-                                                       double decayRate = 0.0);
+                                                       double decayRate = 0.0,
+                                                       double offsetMargin = GAMMA_OFFSET_MARGIN);
         //@}
 
         //! \name Prior Contract
@@ -123,7 +130,12 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior
         //! Reset the prior to non-informative.
         virtual void setToNonInformative(double offset = 0.0, double decayRate = 0.0);
 
-        //! Returns false.
+        //! Get the margin between the smallest value and the support left
+        //! end. Priors with non-negative support, automatically adjust the
+        //! offset if a value is seen which is smaller than offset + margin.
+        virtual double offsetMargin(void) const;
+
+        //! Returns true.
         virtual bool needsOffset(void) const;
 
         //! Reset m_Offset so the smallest sample is not within some minimum
@@ -399,6 +411,9 @@ class MATHS_EXPORT CGammaRateConjugate : public CPrior
         //! us to model data with negative values greater than \f$-u\f$.
         double m_Offset;
 
+        //! The margin between the smallest value and the support left end.
+        double m_OffsetMargin;
+
         //! The maximum likelihood estimate of the shape parameter.
         double m_LikelihoodShape;