Skip to content

Commit f798fdd

Browse files
authored
[7.8][ML] Improve bad input handling in distribution models (#1119)
Backport #1114.
1 parent 304090d commit f798fdd

File tree

7 files changed

+47
-18
lines changed

7 files changed

+47
-18
lines changed

docs/CHANGELOG.asciidoc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
(See {ml-pull}1111[1111].)
3939
* Adds new `num_matches` and `preferred_to_categories` fields to category output.
4040
(See {ml-pull}1062[#1062])
41+
* Improve robustness of anomaly detection to bad input data. (See {ml-pull}1114[#1114].)
4142

4243
== {es} version 7.7.0
4344

lib/maths/CGammaRateConjugate.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -874,13 +874,15 @@ void CGammaRateConjugate::addSamples(const TDouble1Vec& samples,
874874
try {
875875
double shift = boost::math::digamma(m_LikelihoodShape);
876876
for (std::size_t i = 0u; i < samples.size(); ++i) {
877+
double x = samples[i] + m_Offset;
877878
double n = maths_t::countForUpdate(weights[i]);
878879
double varianceScale = maths_t::seasonalVarianceScale(weights[i]) *
879880
maths_t::countVarianceScale(weights[i]);
880881

881-
double x = samples[i] + m_Offset;
882-
if (!CMathsFuncs::isFinite(x) || x <= 0.0) {
883-
LOG_ERROR(<< "Discarding " << x << " it's not gamma");
882+
if (x <= 0.0 || !CMathsFuncs::isFinite(x) || !CMathsFuncs::isFinite(n) ||
883+
!CMathsFuncs::isFinite(varianceScale)) {
884+
LOG_ERROR(<< "Discarding sample = " << x << ", weight = " << n
885+
<< ", variance scale = " << varianceScale);
884886
continue;
885887
}
886888

lib/maths/CLogNormalMeanPrecConjugate.cc

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -576,12 +576,13 @@ class CLogSampleSquareDeviation : core::CNonCopyable {
576576
bool operator()(double x, double& result) const {
577577
result = 0.0;
578578
for (std::size_t i = 0u; i < m_Samples.size(); ++i) {
579-
double residual = m_Samples[i];
580-
if (residual <= 0.0) {
579+
double residual = m_Samples[i] + x;
580+
double n = maths_t::countForUpdate(m_Weights[i]);
581+
if (residual <= 0.0 || !CMathsFuncs::isFinite(residual) ||
582+
!CMathsFuncs::isFinite(n)) {
581583
continue;
582584
}
583-
double n = maths_t::countForUpdate(m_Weights[i]);
584-
residual = std::log(residual + x) - m_Mean;
585+
residual = std::log(residual) - m_Mean;
585586
result += n * CTools::pow2(residual);
586587
}
587588
return true;
@@ -779,10 +780,17 @@ void CLogNormalMeanPrecConjugate::addSamples(const TDouble1Vec& samples,
779780

780781
TMeanAccumulator logSamplesMean_;
781782
for (std::size_t i = 0u; i < samples.size(); ++i) {
783+
double x = samples[i] + m_Offset;
782784
double n = maths_t::countForUpdate(weights[i]);
783785
double varianceScale = maths_t::seasonalVarianceScale(weights[i]) *
784786
maths_t::countVarianceScale(weights[i]);
785-
double x = samples[i] + m_Offset;
787+
if (x <= 0.0 || !CMathsFuncs::isFinite(x) || !CMathsFuncs::isFinite(n) ||
788+
!CMathsFuncs::isFinite(varianceScale)) {
789+
LOG_ERROR(<< "Discarding sample = " << x << ", weight = " << n
790+
<< ", variance scale = " << varianceScale);
791+
continue;
792+
}
793+
786794
numberSamples += n;
787795
double t = varianceScale == 1.0
788796
? r
@@ -814,14 +822,17 @@ void CLogNormalMeanPrecConjugate::addSamples(const TDouble1Vec& samples,
814822
} else {
815823
TMeanVarAccumulator logSamplesMoments;
816824
for (std::size_t i = 0u; i < samples.size(); ++i) {
825+
double x = samples[i] + m_Offset;
817826
double n = maths_t::countForUpdate(weights[i]);
818827
double varianceScale = maths_t::seasonalVarianceScale(weights[i]) *
819828
maths_t::countVarianceScale(weights[i]);
820-
double x = samples[i] + m_Offset;
821-
if (x <= 0.0) {
822-
LOG_ERROR(<< "Discarding " << x << " it's not log-normal");
829+
if (x <= 0.0 || !CMathsFuncs::isFinite(x) || !CMathsFuncs::isFinite(n) ||
830+
!CMathsFuncs::isFinite(varianceScale)) {
831+
LOG_ERROR(<< "Discarding sample = " << x << ", weight = " << n
832+
<< ", variance scale = " << varianceScale);
823833
continue;
824834
}
835+
825836
numberSamples += n;
826837
double t = varianceScale == 1.0
827838
? r

lib/maths/CNormalMeanPrecConjugate.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,9 +613,16 @@ void CNormalMeanPrecConjugate::addSamples(const TDouble1Vec& samples,
613613
double numberSamples = 0.0;
614614
TMeanVarAccumulator sampleMoments;
615615
for (std::size_t i = 0u; i < samples.size(); ++i) {
616+
double x = samples[i];
616617
double n = maths_t::countForUpdate(weights[i]);
617618
double varianceScale = maths_t::seasonalVarianceScale(weights[i]) *
618619
maths_t::countVarianceScale(weights[i]);
620+
if (!CMathsFuncs::isFinite(x) || !CMathsFuncs::isFinite(n) ||
621+
!CMathsFuncs::isFinite(varianceScale)) {
622+
LOG_ERROR(<< "Discarding sample = " << x << ", weight = " << n
623+
<< ", variance scale = " << varianceScale);
624+
continue;
625+
}
619626
numberSamples += n;
620627
sampleMoments.add(samples[i], n / varianceScale);
621628
}

lib/maths/COneOfNPrior.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,11 @@ void COneOfNPrior::addSamples(const TDouble1Vec& samples,
310310
n = this->numberSamples() - n;
311311

312312
for (std::size_t i = 0; i < samples.size(); ++i) {
313-
m_SampleMoments.add(samples[i], maths_t::countForUpdate(weights[i]));
313+
double xi = samples[i];
314+
double ni = maths_t::countForUpdate(weights[i]);
315+
if (CMathsFuncs::isFinite(xi) && CMathsFuncs::isFinite(ni)) {
316+
m_SampleMoments.add(xi, ni);
317+
}
314318
}
315319

316320
// For this 1-of-n model we assume that all the data come from one

lib/maths/CPoissonMeanConjugate.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -332,10 +332,10 @@ void CPoissonMeanConjugate::addSamples(const TDouble1Vec& samples,
332332
double numberSamples = 0.0;
333333
double sampleSum = 0.0;
334334
for (std::size_t i = 0u; i < samples.size(); ++i) {
335-
double n = maths_t::countForUpdate(weights[i]);
336335
double x = samples[i] + m_Offset;
337-
if (!CMathsFuncs::isFinite(x) || x < 0.0) {
338-
LOG_ERROR(<< "Discarding " << x << " it's not Poisson");
336+
double n = maths_t::countForUpdate(weights[i]);
337+
if (x < 0.0 || !CMathsFuncs::isFinite(x) || !CMathsFuncs::isFinite(n)) {
338+
LOG_ERROR(<< "Discarding sample = " << x << ", weight = " << n);
339339
continue;
340340
}
341341
numberSamples += n;

lib/maths/CPrior.cc

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,15 @@ double CPrior::offsetMargin() const {
9292
return 0.0;
9393
}
9494

95-
void CPrior::addSamples(const TDouble1Vec& /*samples*/, const TDoubleWeightsAry1Vec& weights) {
95+
void CPrior::addSamples(const TDouble1Vec& samples, const TDoubleWeightsAry1Vec& weights) {
9696
double n = 0.0;
9797
try {
98-
for (const auto& weight : weights) {
99-
n += maths_t::countForUpdate(weight);
98+
for (std::size_t i = 0; i < weights.size(); ++i) {
99+
double xi = samples[i];
100+
double ni = maths_t::countForUpdate(weights[i]);
101+
if (CMathsFuncs::isFinite(xi) && CMathsFuncs::isFinite(ni)) {
102+
n += ni;
103+
}
100104
}
101105
} catch (const std::exception& e) {
102106
LOG_ERROR(<< "Failed to extract sample counts: " << e.what());

0 commit comments

Comments
 (0)