Skip to content

Commit 18ebdd6

Browse files
author
Hendrik Muhs
authored
[ML] fix segfault caused by to few outliers and harden container usage (#96)
Do not re-weight outliers if there is just 1, preventing a crash downstream and harden accumulators to prevent empty containers. fixes #94
1 parent 27e07a5 commit 18ebdd6

File tree

2 files changed

+40
-25
lines changed

2 files changed

+40
-25
lines changed

include/maths/CBasicStatistics.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1241,6 +1241,9 @@ class MATHS_EXPORT CBasicStatistics {
12411241
class COrderStatisticsStack
12421242
: public COrderStatisticsImpl<T, boost::array<T, N>, LESS>,
12431243
private boost::addable<COrderStatisticsStack<T, N, LESS>> {
1244+
1245+
static_assert(N > 0, "N must be > 0");
1246+
12441247
private:
12451248
using TArray = boost::array<T, N>;
12461249
using TImpl = COrderStatisticsImpl<T, TArray, LESS>;
@@ -1327,10 +1330,18 @@ class MATHS_EXPORT CBasicStatistics {
13271330

13281331
public:
13291332
explicit COrderStatisticsHeap(std::size_t n, const LESS& less = LESS{})
1330-
: TImpl{std::vector<T>(n, T{}), less} {}
1333+
: TImpl{std::vector<T>(std::max(n, std::size_t(1)), T{}), less} {
1334+
if (n == 0) {
1335+
LOG_ERROR(<< "Invalid size of 0 for order statistics accumulator");
1336+
}
1337+
}
13311338

13321339
//! Reset the number of statistics to gather to \p n.
13331340
void resize(std::size_t n) {
1341+
if (n == 0) {
1342+
LOG_ERROR(<< "Invalid resize to 0 for order statistics accumulator");
1343+
n = 1;
1344+
}
13341345
this->clear();
13351346
this->statistics().resize(n);
13361347
}

lib/maths/CTimeSeriesDecompositionDetail.cc

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1596,30 +1596,34 @@ void CTimeSeriesDecompositionDetail::CComponents::reweightOutliers(
15961596
})};
15971597
double numberOutliers{SEASONAL_OUTLIER_FRACTION * numberValues};
15981598

1599-
TMinAccumulator outliers{static_cast<std::size_t>(2.0 * numberOutliers)};
1600-
TMeanAccumulator meanDifference;
1601-
core_t::TTime time = startTime + dt / 2;
1602-
for (std::size_t i = 0; i < values.size(); ++i, time += dt) {
1603-
if (CBasicStatistics::count(values[i]) > 0.0) {
1604-
double difference{std::fabs(CBasicStatistics::mean(values[i]) - predictor(time))};
1605-
outliers.add({-difference, i});
1606-
meanDifference.add(difference);
1607-
}
1608-
}
1609-
outliers.sort();
1610-
TMeanAccumulator meanDifferenceOfOutliers;
1611-
for (std::size_t i = 0u; i < static_cast<std::size_t>(numberOutliers); ++i) {
1612-
meanDifferenceOfOutliers.add(-outliers[i].first);
1613-
}
1614-
meanDifference -= meanDifferenceOfOutliers;
1615-
for (std::size_t i = 0; i < outliers.count(); ++i) {
1616-
if (-outliers[i].first > SEASONAL_OUTLIER_DIFFERENCE_THRESHOLD *
1617-
CBasicStatistics::mean(meanDifference)) {
1618-
double weight{SEASONAL_OUTLIER_WEIGHT +
1619-
(1.0 - SEASONAL_OUTLIER_WEIGHT) *
1620-
CTools::logisticFunction(static_cast<double>(i) / numberOutliers,
1621-
0.1, 1.0)};
1622-
CBasicStatistics::count(values[outliers[i].second]) *= weight;
1599+
if (numberOutliers > 1.0) {
1600+
1601+
TMinAccumulator outliers{static_cast<std::size_t>(2.0 * numberOutliers)};
1602+
TMeanAccumulator meanDifference;
1603+
core_t::TTime time = startTime + dt / 2;
1604+
for (std::size_t i = 0; i < values.size(); ++i, time += dt) {
1605+
if (CBasicStatistics::count(values[i]) > 0.0) {
1606+
double difference{
1607+
std::fabs(CBasicStatistics::mean(values[i]) - predictor(time))};
1608+
outliers.add({-difference, i});
1609+
meanDifference.add(difference);
1610+
}
1611+
}
1612+
outliers.sort();
1613+
TMeanAccumulator meanDifferenceOfOutliers;
1614+
for (std::size_t i = 0u; i < static_cast<std::size_t>(numberOutliers); ++i) {
1615+
meanDifferenceOfOutliers.add(-outliers[i].first);
1616+
}
1617+
meanDifference -= meanDifferenceOfOutliers;
1618+
for (std::size_t i = 0; i < outliers.count(); ++i) {
1619+
if (-outliers[i].first > SEASONAL_OUTLIER_DIFFERENCE_THRESHOLD *
1620+
CBasicStatistics::mean(meanDifference)) {
1621+
double weight{SEASONAL_OUTLIER_WEIGHT +
1622+
(1.0 - SEASONAL_OUTLIER_WEIGHT) *
1623+
CTools::logisticFunction(static_cast<double>(i) / numberOutliers,
1624+
0.1, 1.0)};
1625+
CBasicStatistics::count(values[outliers[i].second]) *= weight;
1626+
}
16231627
}
16241628
}
16251629
}

0 commit comments

Comments
 (0)