diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 454d67135b..cd3f2e182a 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -28,6 +28,13 @@ //=== Regressions +== {es} version 7.2.1 + +=== Bug Fixes + +* Fix an edge case causing spurious anomalies (false positives) if the variance in the count of events +changed significantly throughout the period of a seasonal quantity. (See {ml-pull}489[#489].) + == {es} version 7.2.0 === Enhancements diff --git a/lib/maths/COneOfNPrior.cc b/lib/maths/COneOfNPrior.cc index f969336601..b77170f37e 100644 --- a/lib/maths/COneOfNPrior.cc +++ b/lib/maths/COneOfNPrior.cc @@ -54,7 +54,7 @@ double logn(std::size_t n) { const double DERATE = 0.99999; const double MINUS_INF = DERATE * boost::numeric::bounds::lowest(); const double INF = DERATE * boost::numeric::bounds::highest(); -const double MAXIMUM_LOG_BAYES_FACTOR = std::log(1e6); +const double MAXIMUM_LOG_BAYES_FACTOR = std::log(1e8); const double MAXIMUM_UNPENALISED_RELATIVE_VARIANCE_ERROR = 9.0; const double MINIMUM_SIGNIFICANT_WEIGHT = 0.01; const double MAXIMUM_RELATIVE_ERROR = 1e-3; diff --git a/lib/maths/CTimeSeriesModel.cc b/lib/maths/CTimeSeriesModel.cc index e3ca68169a..7437579e98 100644 --- a/lib/maths/CTimeSeriesModel.cc +++ b/lib/maths/CTimeSeriesModel.cc @@ -1619,6 +1619,11 @@ void CUnivariateTimeSeriesModel::reinitializeStateGivenNewComponent( // re-weight so that the total sample weight corresponds to the sample // weight the model receives from a fixed (shortish) time interval. + // We can't properly handle periodicity in the variance of the rate if + // using a Poisson process so remove it from model detectio if we detect + // seasonality. + m_ResidualModel->removeModels( + maths::CPrior::CModelFilter().remove(maths::CPrior::E_Poisson)); m_ResidualModel->setToNonInformative(0.0, m_ResidualModel->decayRate()); if (initialValues.size() > 0) { @@ -1648,16 +1653,6 @@ void CUnivariateTimeSeriesModel::reinitializeStateGivenNewComponent( } } - // Note we can't reinitialize this from the initial values because we do - // not expect these to be at the bucket length granularity. - if (m_MultibucketFeature != nullptr) { - m_MultibucketFeature->clear(); - } - if (m_MultibucketFeatureModel != nullptr) { - m_MultibucketFeatureModel->setToNonInformative( - 0.0, m_MultibucketFeatureModel->decayRate()); - } - if (m_Correlations != nullptr) { m_Correlations->clearCorrelationModels(m_Id); } @@ -1670,6 +1665,18 @@ void CUnivariateTimeSeriesModel::reinitializeStateGivenNewComponent( controller.reset(); } } + + // Note we can't reinitialize this from the initial values because we do + // not expect these to be at the bucket length granularity. + if (m_MultibucketFeature != nullptr) { + m_MultibucketFeature->clear(); + } + if (m_MultibucketFeatureModel != nullptr) { + m_MultibucketFeatureModel->removeModels( + maths::CPrior::CModelFilter().remove(maths::CPrior::E_Poisson)); + m_MultibucketFeatureModel->setToNonInformative(0.0, m_ResidualModel->decayRate()); + } + if (m_AnomalyModel != nullptr) { m_AnomalyModel->reset(); } @@ -2987,16 +2994,6 @@ void CMultivariateTimeSeriesModel::reinitializeStateGivenNewComponent( } } - // Note we can't reinitialize this from the initial values because we do - // not expect these to be at the bucket length granularity. - if (m_MultibucketFeature != nullptr) { - m_MultibucketFeature->clear(); - } - if (m_MultibucketFeatureModel != nullptr) { - m_MultibucketFeatureModel->setToNonInformative( - 0.0, m_MultibucketFeatureModel->decayRate()); - } - if (m_Controllers != nullptr) { m_ResidualModel->decayRate(m_ResidualModel->decayRate() / (*m_Controllers)[E_ResidualControl].multiplier()); @@ -3008,6 +3005,17 @@ void CMultivariateTimeSeriesModel::reinitializeStateGivenNewComponent( controller.reset(); } } + + // Note we can't reinitialize this from the initial values because we do + // not expect these to be at the bucket length granularity. + if (m_MultibucketFeature != nullptr) { + m_MultibucketFeature->clear(); + } + + if (m_MultibucketFeatureModel != nullptr) { + m_MultibucketFeatureModel->setToNonInformative(0.0, m_ResidualModel->decayRate()); + } + if (m_AnomalyModel != nullptr) { m_AnomalyModel->reset(); } diff --git a/lib/model/unittest/CEventRateModelTest.cc b/lib/model/unittest/CEventRateModelTest.cc index fdba81e19a..fba69bef76 100644 --- a/lib/model/unittest/CEventRateModelTest.cc +++ b/lib/model/unittest/CEventRateModelTest.cc @@ -592,7 +592,7 @@ void CEventRateModelTest::testProbabilityCalculation() { for (int j = 0; j < 2; ++j) { double multiBucketImpact = minProbabilities[j].third.s_MultiBucketImpact; LOG_DEBUG(<< "multi_bucket_impact = " << multiBucketImpact); - CPPUNIT_ASSERT(multiBucketImpact > expectedMultiBucketImpactThresholds[j]); + CPPUNIT_ASSERT(multiBucketImpact >= expectedMultiBucketImpactThresholds[j]); CPPUNIT_ASSERT(multiBucketImpact <= CAnomalyDetectorModelConfig::MAXIMUM_MULTI_BUCKET_IMPACT_MAGNITUDE); } } diff --git a/lib/model/unittest/CMetricAnomalyDetectorTest.cc b/lib/model/unittest/CMetricAnomalyDetectorTest.cc index ef29b01be8..c011081e1a 100644 --- a/lib/model/unittest/CMetricAnomalyDetectorTest.cc +++ b/lib/model/unittest/CMetricAnomalyDetectorTest.cc @@ -444,7 +444,7 @@ void CMetricAnomalyDetectorTest::testExcludeFrequent() { // expect there to be 1 anomaly CPPUNIT_ASSERT_EQUAL(std::size_t(1), highAnomalyTimes.size()); - CPPUNIT_ASSERT_DOUBLES_EQUAL(9.0, highAnomalyFactors[0], 2.0); + CPPUNIT_ASSERT_DOUBLES_EQUAL(12.0, highAnomalyFactors[0], 2.0); } }