From 26513dc6215e9c83dbfbec731e82f60ec6a0878e Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Mon, 11 May 2020 10:31:21 +0100 Subject: [PATCH 1/3] Trap possible divide by zero --- lib/maths/CAdaptiveBucketing.cc | 29 +++++++++++++------ .../CCalendarComponentAdaptiveBucketing.cc | 4 ++- lib/maths/CTimeSeriesDecomposition.cc | 2 +- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/lib/maths/CAdaptiveBucketing.cc b/lib/maths/CAdaptiveBucketing.cc index 195ce62ff2..88443a7246 100644 --- a/lib/maths/CAdaptiveBucketing.cc +++ b/lib/maths/CAdaptiveBucketing.cc @@ -246,7 +246,7 @@ bool CAdaptiveBucketing::initialize(double a, double b, std::size_t n) { void CAdaptiveBucketing::initialValues(core_t::TTime start, core_t::TTime end, const TFloatMeanAccumulatorVec& values) { - if (!this->initialized()) { + if (this->initialized() == false) { return; } @@ -406,7 +406,7 @@ void CAdaptiveBucketing::refine(core_t::TTime time) { LOG_TRACE(<< "totalAveragingError = " << totalAveragingError); double n_{static_cast(n)}; - double step{(1 - n_ * EPS) * totalAveragingError / n_}; + double step{(1.0 - n_ * EPS) * totalAveragingError / n_}; TFloatVec endpoints{m_Endpoints}; LOG_TRACE(<< "step = " << step); @@ -505,7 +505,7 @@ bool CAdaptiveBucketing::knots(core_t::TTime time, double a{m_Endpoints[i]}; double b{m_Endpoints[i + 1]}; double c{m_Centres[i]}; - double c0{c}; + double c0{c - m_Endpoints[0]}; knots.push_back(m_Endpoints[0]); values.push_back(this->predict(i, time, c)); variances.push_back(this->variance(i)); @@ -549,26 +549,37 @@ bool CAdaptiveBucketing::knots(core_t::TTime time, double alpha{m_Endpoints[n] - m_Centres[j]}; double beta{c0}; double Z{alpha + beta}; + if (Z == 0.0) { + alpha = beta = 0.5; + } else { + alpha /= Z; + beta /= Z; + } double lastPeriodValue{ this->predict(j, time, m_Centres[j] - m_Endpoints[n])}; double lastPeriodVariance{this->variance(j)}; knots[0] = m_Endpoints[0]; - values[0] = (alpha * values[0] + beta * lastPeriodValue) / Z; - variances[0] = (alpha * variances[0] + beta * lastPeriodVariance) / Z; + values[0] = alpha * values[0] + beta * lastPeriodValue; + variances[0] = alpha * variances[0] + beta * lastPeriodVariance; break; } } - for (std::size_t j = 0u; j < n; ++j) { + for (std::size_t j = 0; j < n; ++j) { if (this->bucketCount(j) > 0.0) { double alpha{m_Centres[j]}; double beta{m_Endpoints[n] - knots.back()}; double Z{alpha + beta}; + if (Z == 0.0) { + alpha = beta = 0.5; + } else { + alpha /= Z; + beta /= Z; + } double nextPeriodValue{ this->predict(j, time, m_Endpoints[n] + m_Centres[j])}; double nextPeriodVariance{this->variance(j)}; - values.push_back((alpha * values.back() + beta * nextPeriodValue) / Z); - variances.push_back( - (alpha * variances.back() + beta * nextPeriodVariance) / Z); + values.push_back(alpha * values.back() + beta * nextPeriodValue); + variances.push_back(alpha * variances.back() + beta * nextPeriodVariance); knots.push_back(m_Endpoints[n]); break; } diff --git a/lib/maths/CCalendarComponentAdaptiveBucketing.cc b/lib/maths/CCalendarComponentAdaptiveBucketing.cc index 96ad17c33f..c26c7b213d 100644 --- a/lib/maths/CCalendarComponentAdaptiveBucketing.cc +++ b/lib/maths/CCalendarComponentAdaptiveBucketing.cc @@ -276,7 +276,9 @@ void CCalendarComponentAdaptiveBucketing::refresh(const TFloatVec& oldEndpoints) static_cast(oldCentres[l - 1])); largeErrorCount += w * oldLargeErrorCounts[l - 1]; count += w * w * CBasicStatistics::count(m_Values[l - 1]); - double scale{count / CBasicStatistics::count(value)}; + double scale{count == CBasicStatistics::count(value) + ? 1.0 + : count / CBasicStatistics::count(value)}; newValues.push_back(CBasicStatistics::scaled(value, scale)); newCentres.push_back(CTools::truncate(CBasicStatistics::mean(centre), yl, yr)); newLargeErrorCounts.push_back(largeErrorCount); diff --git a/lib/maths/CTimeSeriesDecomposition.cc b/lib/maths/CTimeSeriesDecomposition.cc index 39ebbaa31e..22b41dc26e 100644 --- a/lib/maths/CTimeSeriesDecomposition.cc +++ b/lib/maths/CTimeSeriesDecomposition.cc @@ -417,7 +417,7 @@ TDoubleDoublePr CTimeSeriesDecomposition::scale(core_t::TTime time, } double mean{this->meanVariance()}; - if (mean == 0.0) { + if (mean == 0.0 || variance == 0.0) { return {1.0, 1.0}; } From be21a99cab8f01e9ac8ae5538e59a2c82539c62f Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Mon, 11 May 2020 10:47:06 +0100 Subject: [PATCH 2/3] Docs --- docs/CHANGELOG.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 738c3ff8f4..ac4d39e32e 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -87,6 +87,7 @@ {ml-pull}1197[#1197].) * Improve the initialization of seasonal components for anomaly detection. (See {ml-pull}1201[#1201], issue: {ml-issue}#1178[#1178].) +* Fix possible root cause for "Bad variance scale nan" log errors. (See {ml-pull}1225[#1225].) == {es} version 7.7.1 From da27f31fbfea1d9590d7a083e4bfcb955aa17596 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Mon, 11 May 2020 13:10:19 +0100 Subject: [PATCH 3/3] Comment --- lib/maths/CCalendarComponentAdaptiveBucketing.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/maths/CCalendarComponentAdaptiveBucketing.cc b/lib/maths/CCalendarComponentAdaptiveBucketing.cc index c26c7b213d..4a12ad8e4c 100644 --- a/lib/maths/CCalendarComponentAdaptiveBucketing.cc +++ b/lib/maths/CCalendarComponentAdaptiveBucketing.cc @@ -276,6 +276,8 @@ void CCalendarComponentAdaptiveBucketing::refresh(const TFloatVec& oldEndpoints) static_cast(oldCentres[l - 1])); largeErrorCount += w * oldLargeErrorCounts[l - 1]; count += w * w * CBasicStatistics::count(m_Values[l - 1]); + // Defend against 0 / 0: if CBasicStatistics::count(value) + // is zero then count must be too. double scale{count == CBasicStatistics::count(value) ? 1.0 : count / CBasicStatistics::count(value)};