Skip to content

Commit

Permalink
[ML] Anomaly detection for multiple bucket features (#175)
Browse files Browse the repository at this point in the history
  • Loading branch information
tveasey authored Aug 17, 2018
1 parent 88095c1 commit 548222b
Show file tree
Hide file tree
Showing 59 changed files with 2,685 additions and 1,413 deletions.
6 changes: 0 additions & 6 deletions bin/autodetect/CCmdLineParser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ bool CCmdLineParser::parse(int argc,
bool& memoryUsage,
std::size_t& bucketResultsDelay,
bool& multivariateByFields,
std::string& multipleBucketspans,
bool& perPartitionNormalization,
TStrVec& clauseTokens) {
try {
Expand Down Expand Up @@ -117,8 +116,6 @@ bool CCmdLineParser::parse(int argc,
"The numer of half buckets to store before choosing which overlapping bucket has the biggest anomaly")
("multivariateByFields",
"Optional flag to enable multi-variate analysis of correlated by fields")
("multipleBucketspans", boost::program_options::value<std::string>(),
"Optional comma-separated list of additional bucketspans - must be direct multiples of the main bucketspan")
("perPartitionNormalization",
"Optional flag to enable per partition normalization")
;
Expand Down Expand Up @@ -234,9 +231,6 @@ bool CCmdLineParser::parse(int argc,
if (vm.count("multivariateByFields") > 0) {
multivariateByFields = true;
}
if (vm.count("multipleBucketspans") > 0) {
multipleBucketspans = vm["multipleBucketspans"].as<std::string>();
}
if (vm.count("perPartitionNormalization") > 0) {
perPartitionNormalization = true;
}
Expand Down
1 change: 0 additions & 1 deletion bin/autodetect/CCmdLineParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ class CCmdLineParser {
bool& memoryUsage,
std::size_t& bucketResultsDelay,
bool& multivariateByFields,
std::string& multipleBucketspans,
bool& perPartitionNormalization,
TStrVec& clauseTokens);

Expand Down
10 changes: 4 additions & 6 deletions bin/autodetect/Main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ int main(int argc, char** argv) {
bool memoryUsage(false);
std::size_t bucketResultsDelay(0);
bool multivariateByFields(false);
std::string multipleBucketspans;
bool perPartitionNormalization(false);
TStrVec clauseTokens;
if (ml::autodetect::CCmdLineParser::parse(
Expand All @@ -97,10 +96,9 @@ int main(int argc, char** argv) {
summaryCountFieldName, delimiter, lengthEncodedInput, timeField,
timeFormat, quantilesStateFile, deleteStateFiles, persistInterval,
maxQuantileInterval, inputFileName, isInputFileNamedPipe, outputFileName,
isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe,
persistFileName, isPersistFileNamedPipe, maxAnomalyRecords, memoryUsage,
bucketResultsDelay, multivariateByFields, multipleBucketspans,
perPartitionNormalization, clauseTokens) == false) {
isOutputFileNamedPipe, restoreFileName, isRestoreFileNamedPipe, persistFileName,
isPersistFileNamedPipe, maxAnomalyRecords, memoryUsage, bucketResultsDelay,
multivariateByFields, perPartitionNormalization, clauseTokens) == false) {
return EXIT_FAILURE;
}

Expand Down Expand Up @@ -147,7 +145,7 @@ int main(int argc, char** argv) {
ml::model::CAnomalyDetectorModelConfig modelConfig =
ml::model::CAnomalyDetectorModelConfig::defaultConfig(
bucketSpan, summaryMode, summaryCountFieldName, latency,
bucketResultsDelay, multivariateByFields, multipleBucketspans);
bucketResultsDelay, multivariateByFields);
modelConfig.perPartitionNormalization(perPartitionNormalization);
modelConfig.detectionRules(ml::model::CAnomalyDetectorModelConfig::TIntDetectionRuleVecUMapCRef(
fieldConfig.detectionRules()));
Expand Down
19 changes: 17 additions & 2 deletions docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,20 @@
//=== Regressions
//=== Known Issues
////
== {es} version 6.5.0
//=== Breaking Changes
//=== Deprecations
//=== New Features
=== Enhancements
Perform anomaly detection on features derived from multiple bucket values to improve robustness
of detection with respect to misconfigured bucket lengths and improve detection of long lasting
anomalies. (See {pull}175[#175].)
//=== Bug Fixes
//=== Regressions
29 changes: 18 additions & 11 deletions include/maths/CBasicStatistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ class MATHS_EXPORT CBasicStatistics {
template<typename T, unsigned int ORDER>
struct SSampleCentralMoments : public std::unary_function<T, void> {
using TCoordinate = typename SCoordinate<T>::Type;
using TValue = T;

//! See core::CMemory.
static bool dynamicSizeAlwaysZero() {
Expand Down Expand Up @@ -1480,17 +1481,6 @@ class MATHS_EXPORT CBasicStatistics {
//! The set maximum.
COrderStatisticsStack<T, 1, GREATER> m_Max;
};

// Friends
template<typename T>
friend std::ostream&
operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments<T, 1u>&);
template<typename T>
friend std::ostream&
operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments<T, 2u>&);
template<typename T>
friend std::ostream&
operator<<(std::ostream& o, const CBasicStatistics::SSampleCentralMoments<T, 3u>&);
};

template<typename T>
Expand Down Expand Up @@ -1596,6 +1586,23 @@ template<typename U>
void CBasicStatistics::SSampleCentralMoments<T, ORDER>::add(const U& x, const TCoordinate& n) {
basic_statistics_detail::SCentralMomentsCustomAdd<U>::add(x, n, *this);
}

//! \brief Defines a promoted type for a SSampleCentralMoments.
//!
//! \see CTypeConversions.h for details.
template<typename T, unsigned int N>
struct SPromoted<CBasicStatistics::SSampleCentralMoments<T, N>> {
using Type = CBasicStatistics::SSampleCentralMoments<typename SPromoted<T>::Type, N>;
};

//! \brief Defines SSampleCentralMoments on a suitable floating point type.
//!
//! \see CTypeConversions.h for details.
template<typename T, unsigned int N, typename U>
struct SFloatingPoint<CBasicStatistics::SSampleCentralMoments<T, N>, U> {
using Type =
CBasicStatistics::SSampleCentralMoments<typename SFloatingPoint<T, U>::Type, N>;
};
}
}

Expand Down
20 changes: 20 additions & 0 deletions include/maths/CBasicStatisticsPersist.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ template<typename T, std::size_t N>
bool stringToType(const std::string& str, CSymmetricMatrixNxN<T, N>& value) {
return value.fromDelimited(str);
}
//! Function to do conversion from string to a vector.
template<typename T>
bool stringToType(const std::string& str, CVector<T>& value) {
return value.fromDelimited(str);
}
//! Function to do conversion from string to a symmetric matrix.
template<typename T>
bool stringToType(const std::string& str, CSymmetricMatrix<T>& value) {
return value.fromDelimited(str);
}

//! Function to do conversion to a string.
template<typename T>
Expand All @@ -72,6 +82,16 @@ template<typename T, std::size_t N>
inline std::string typeToString(const CSymmetricMatrixNxN<T, N>& value) {
return value.toDelimited();
}
//! Function to do conversion to a string from a vector.
template<typename T>
inline std::string typeToString(const CVector<T>& value) {
return value.toDelimited();
}
//! Function to do conversion to a string from a symmetric matrix.
template<typename T>
inline std::string typeToString(const CSymmetricMatrix<T>& value) {
return value.toDelimited();
}
}

template<typename T, unsigned int ORDER>
Expand Down
18 changes: 12 additions & 6 deletions include/maths/CLinearAlgebra.h
Original file line number Diff line number Diff line change
Expand Up @@ -928,7 +928,6 @@ class CVectorNx1 : private boost::equality_comparable< CVectorNx1<T, N>,

public:
using TArray = T[N];
using TVec = std::vector<T>;
using TBoostArray = boost::array<T, N>;
using TConstIterator = typename TBoostArray::const_iterator;

Expand All @@ -950,21 +949,24 @@ class CVectorNx1 : private boost::equality_comparable< CVectorNx1<T, N>,
}

//! Construct from a boost array.
explicit CVectorNx1(const boost::array<T, N>& a) {
template<typename U>
explicit CVectorNx1(const boost::array<U, N>& a) {
for (std::size_t i = 0u; i < N; ++i) {
TBase::m_X[i] = a[i];
}
}

//! Construct from a vector.
explicit CVectorNx1(const TVec& v) {
template<typename U>
explicit CVectorNx1(const std::vector<U>& v) {
for (std::size_t i = 0u; i < N; ++i) {
TBase::m_X[i] = v[i];
}
}

//! Construct from a vector.
explicit CVectorNx1(const core::CSmallVectorBase<T>& v) {
template<typename U>
explicit CVectorNx1(const core::CSmallVectorBase<U>& v) {
for (std::size_t i = 0u; i < N; ++i) {
TBase::m_X[i] = v[i];
}
Expand Down Expand Up @@ -1244,10 +1246,14 @@ class CVector : private boost::equality_comparable< CVector<T>,
}

//! Construct from a vector.
explicit CVector(const TArray& v) { TBase::m_X = v; }
template<typename U>
explicit CVector(const std::vector<U>& v) {
TBase::m_X = v;
}

//! Construct from a vector.
explicit CVector(const core::CSmallVectorBase<T>& v) {
template<typename U>
explicit CVector(const core::CSmallVectorBase<U>& v) {
TBase::m_X.assign(v.begin(), v.end());
}

Expand Down
78 changes: 55 additions & 23 deletions include/maths/CModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,6 @@ class MATHS_EXPORT CModelAddSamplesParams {
using TDouble2VecWeightsAryVec = std::vector<maths_t::TDouble2VecWeightsAry>;

public:
CModelAddSamplesParams();

//! Set whether or not the data are integer valued.
CModelAddSamplesParams& integer(bool integer);
//! Get the data type.
Expand Down Expand Up @@ -133,15 +131,15 @@ class MATHS_EXPORT CModelAddSamplesParams {

private:
//! The data type.
maths_t::EDataType m_Type;
maths_t::EDataType m_Type = maths_t::E_MixedData;
//! True if the data are non-negative false otherwise.
bool m_IsNonNegative;
bool m_IsNonNegative = false;
//! The propagation interval.
double m_PropagationInterval;
double m_PropagationInterval = 1.0;
//! The trend sample weights.
const TDouble2VecWeightsAryVec* m_TrendWeights;
const TDouble2VecWeightsAryVec* m_TrendWeights = nullptr;
//! The prior sample weights.
const TDouble2VecWeightsAryVec* m_PriorWeights;
const TDouble2VecWeightsAryVec* m_PriorWeights = nullptr;
};

//! \brief The extra parameters needed by CModel::probability.
Expand Down Expand Up @@ -178,6 +176,8 @@ class MATHS_EXPORT CModelProbabilityParams {

//! Add whether a value's bucket is empty.
CModelProbabilityParams& addBucketEmpty(const TBool2Vec& empty);
//! Set whether or not the values' bucket is empty.
CModelProbabilityParams& bucketEmpty(const TBool2Vec1Vec& empty);
//! Get whether the values' bucket is empty.
const TBool2Vec1Vec& bucketEmpty() const;

Expand All @@ -200,14 +200,19 @@ class MATHS_EXPORT CModelProbabilityParams {
//! Get the most anomalous correlate if there is one.
TOptionalSize mostAnomalousCorrelate() const;

//! Set whether or not to update the anomaly model.
CModelProbabilityParams& updateAnomalyModel(bool update);
//! Get whether or not to update the anomaly model.
bool updateAnomalyModel() const;
//! Set whether or not to use multibucket features.
CModelProbabilityParams& useMultibucketFeatures(bool use);
//! Get whether or not to use multibucket features.
bool useMultibucketFeatures() const;

//! Set whether or not to use the anomaly model.
CModelProbabilityParams& useAnomalyModel(bool use);
//! Get whether or not to use the anomaly model.
bool useAnomalyModel() const;

private:
//! The entity tag (if relevant otherwise 0).
std::size_t m_Tag;
std::size_t m_Tag = 0;
//! The coordinates' probability calculations.
TProbabilityCalculation2Vec m_Calculations;
//! The confidence interval to use when detrending.
Expand All @@ -220,8 +225,41 @@ class MATHS_EXPORT CModelProbabilityParams {
TSize2Vec m_Coordinates;
//! The most anomalous coordinate (if there is one).
TOptionalSize m_MostAnomalousCorrelate;
//! Whether or not to update the anomaly model.
bool m_UpdateAnomalyModel;
//! Whether or not to use multibucket features.
bool m_UseMultibucketFeatures = true;
//! Whether or not to use the anomaly model.
bool m_UseAnomalyModel = true;
};

//! \brief Describes the result of the model probability calculation.
struct MATHS_EXPORT SModelProbabilityResult {
using TDouble4Vec = core::CSmallVector<double, 4>;
using TSize1Vec = core::CSmallVector<std::size_t, 1>;
using TTail2Vec = core::CSmallVector<maths_t::ETail, 2>;

//! \brief Wraps up a feature label and probability.
struct MATHS_EXPORT SFeatureProbability {
using TStrCRef = boost::reference_wrapper<const std::string>;
SFeatureProbability();
SFeatureProbability(const std::string& label, double probability);
TStrCRef s_Label;
double s_Probability = 1.0;
};
using TFeatureProbability4Vec = core::CSmallVector<SFeatureProbability, 4>;

//! The overall result probability.
double s_Probability = 1.0;
//! True if the probability depends on the correlation between two
//! time series and false otherwise.
bool s_Conditional = false;
//! The probabilities for each individual feature.
TFeatureProbability4Vec s_FeatureProbabilities;
//! The tail of the current bucket probability.
TTail2Vec s_Tail;
//! The identifier of the time series correlated with this one which
//! has the smallest probability in the current bucket (if and only
//! if the result depends on the correlation structure).
TSize1Vec s_MostAnomalousCorrelate;
};

//! \brief The model interface.
Expand Down Expand Up @@ -355,10 +393,7 @@ class MATHS_EXPORT CModel {
virtual bool probability(const CModelProbabilityParams& params,
const TTime2Vec1Vec& time,
const TDouble2Vec1Vec& value,
double& probability,
TTail2Vec& tail,
bool& conditional,
TSize1Vec& mostAnomalousCorrelate) const = 0;
SModelProbabilityResult& result) const = 0;

//! Get the Winsorisation weight to apply to \p value,
//! if appropriate.
Expand Down Expand Up @@ -499,14 +534,11 @@ class MATHS_EXPORT CModelStub : public CModel {
const TForecastPushDatapointFunc& forecastPushDataPointFunc,
std::string& messageOut);

//! Returns 1.0.
//! Returns true.
virtual bool probability(const CModelProbabilityParams& params,
const TTime2Vec1Vec& time,
const TDouble2Vec1Vec& value,
double& probability,
TTail2Vec& tail,
bool& conditional,
TSize1Vec& mostAnomalousCorrelate) const;
SModelProbabilityResult& result) const;

//! Returns empty.
virtual TDouble2Vec
Expand Down
Loading

0 comments on commit 548222b

Please sign in to comment.