Skip to content

Commit 89c9402

Browse files
authored
[ML] Fix regression in QA data set (#489)
1 parent 8a02a57 commit 89c9402

File tree

5 files changed

+38
-23
lines changed

5 files changed

+38
-23
lines changed

docs/CHANGELOG.asciidoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@
2828
2929
//=== Regressions
3030
31+
== {es} version 7.2.1
32+
33+
=== Bug Fixes
34+
35+
* Fix an edge case causing spurious anomalies (false positives) if the variance in the count of events
36+
changed significantly throughout the period of a seasonal quantity. (See {ml-pull}489[#489].)
37+
3138
== {es} version 7.2.0
3239
3340
=== Enhancements

lib/maths/COneOfNPrior.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ double logn(std::size_t n) {
5454
const double DERATE = 0.99999;
5555
const double MINUS_INF = DERATE * boost::numeric::bounds<double>::lowest();
5656
const double INF = DERATE * boost::numeric::bounds<double>::highest();
57-
const double MAXIMUM_LOG_BAYES_FACTOR = std::log(1e6);
57+
const double MAXIMUM_LOG_BAYES_FACTOR = std::log(1e8);
5858
const double MAXIMUM_UNPENALISED_RELATIVE_VARIANCE_ERROR = 9.0;
5959
const double MINIMUM_SIGNIFICANT_WEIGHT = 0.01;
6060
const double MAXIMUM_RELATIVE_ERROR = 1e-3;

lib/maths/CTimeSeriesModel.cc

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1619,6 +1619,11 @@ void CUnivariateTimeSeriesModel::reinitializeStateGivenNewComponent(
16191619
// re-weight so that the total sample weight corresponds to the sample
16201620
// weight the model receives from a fixed (shortish) time interval.
16211621

1622+
// We can't properly handle periodicity in the variance of the rate if
1623+
// using a Poisson process so remove it from model detectio if we detect
1624+
// seasonality.
1625+
m_ResidualModel->removeModels(
1626+
maths::CPrior::CModelFilter().remove(maths::CPrior::E_Poisson));
16221627
m_ResidualModel->setToNonInformative(0.0, m_ResidualModel->decayRate());
16231628

16241629
if (initialValues.size() > 0) {
@@ -1648,16 +1653,6 @@ void CUnivariateTimeSeriesModel::reinitializeStateGivenNewComponent(
16481653
}
16491654
}
16501655

1651-
// Note we can't reinitialize this from the initial values because we do
1652-
// not expect these to be at the bucket length granularity.
1653-
if (m_MultibucketFeature != nullptr) {
1654-
m_MultibucketFeature->clear();
1655-
}
1656-
if (m_MultibucketFeatureModel != nullptr) {
1657-
m_MultibucketFeatureModel->setToNonInformative(
1658-
0.0, m_MultibucketFeatureModel->decayRate());
1659-
}
1660-
16611656
if (m_Correlations != nullptr) {
16621657
m_Correlations->clearCorrelationModels(m_Id);
16631658
}
@@ -1670,6 +1665,18 @@ void CUnivariateTimeSeriesModel::reinitializeStateGivenNewComponent(
16701665
controller.reset();
16711666
}
16721667
}
1668+
1669+
// Note we can't reinitialize this from the initial values because we do
1670+
// not expect these to be at the bucket length granularity.
1671+
if (m_MultibucketFeature != nullptr) {
1672+
m_MultibucketFeature->clear();
1673+
}
1674+
if (m_MultibucketFeatureModel != nullptr) {
1675+
m_MultibucketFeatureModel->removeModels(
1676+
maths::CPrior::CModelFilter().remove(maths::CPrior::E_Poisson));
1677+
m_MultibucketFeatureModel->setToNonInformative(0.0, m_ResidualModel->decayRate());
1678+
}
1679+
16731680
if (m_AnomalyModel != nullptr) {
16741681
m_AnomalyModel->reset();
16751682
}
@@ -2987,16 +2994,6 @@ void CMultivariateTimeSeriesModel::reinitializeStateGivenNewComponent(
29872994
}
29882995
}
29892996

2990-
// Note we can't reinitialize this from the initial values because we do
2991-
// not expect these to be at the bucket length granularity.
2992-
if (m_MultibucketFeature != nullptr) {
2993-
m_MultibucketFeature->clear();
2994-
}
2995-
if (m_MultibucketFeatureModel != nullptr) {
2996-
m_MultibucketFeatureModel->setToNonInformative(
2997-
0.0, m_MultibucketFeatureModel->decayRate());
2998-
}
2999-
30002997
if (m_Controllers != nullptr) {
30012998
m_ResidualModel->decayRate(m_ResidualModel->decayRate() /
30022999
(*m_Controllers)[E_ResidualControl].multiplier());
@@ -3008,6 +3005,17 @@ void CMultivariateTimeSeriesModel::reinitializeStateGivenNewComponent(
30083005
controller.reset();
30093006
}
30103007
}
3008+
3009+
// Note we can't reinitialize this from the initial values because we do
3010+
// not expect these to be at the bucket length granularity.
3011+
if (m_MultibucketFeature != nullptr) {
3012+
m_MultibucketFeature->clear();
3013+
}
3014+
3015+
if (m_MultibucketFeatureModel != nullptr) {
3016+
m_MultibucketFeatureModel->setToNonInformative(0.0, m_ResidualModel->decayRate());
3017+
}
3018+
30113019
if (m_AnomalyModel != nullptr) {
30123020
m_AnomalyModel->reset();
30133021
}

lib/model/unittest/CEventRateModelTest.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ void CEventRateModelTest::testProbabilityCalculation() {
592592
for (int j = 0; j < 2; ++j) {
593593
double multiBucketImpact = minProbabilities[j].third.s_MultiBucketImpact;
594594
LOG_DEBUG(<< "multi_bucket_impact = " << multiBucketImpact);
595-
CPPUNIT_ASSERT(multiBucketImpact > expectedMultiBucketImpactThresholds[j]);
595+
CPPUNIT_ASSERT(multiBucketImpact >= expectedMultiBucketImpactThresholds[j]);
596596
CPPUNIT_ASSERT(multiBucketImpact <= CAnomalyDetectorModelConfig::MAXIMUM_MULTI_BUCKET_IMPACT_MAGNITUDE);
597597
}
598598
}

lib/model/unittest/CMetricAnomalyDetectorTest.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ void CMetricAnomalyDetectorTest::testExcludeFrequent() {
444444

445445
// expect there to be 1 anomaly
446446
CPPUNIT_ASSERT_EQUAL(std::size_t(1), highAnomalyTimes.size());
447-
CPPUNIT_ASSERT_DOUBLES_EQUAL(9.0, highAnomalyFactors[0], 2.0);
447+
CPPUNIT_ASSERT_DOUBLES_EQUAL(12.0, highAnomalyFactors[0], 2.0);
448448
}
449449
}
450450

0 commit comments

Comments
 (0)