Skip to content

Commit ec1fcd3

Browse files
Hendrik Muhstveasey
Hendrik Muhs
authored andcommitted
[ML] fix segfault caused by to few outliers and harden container usage (#96)
Do not re-weight outliers if there is just 1, preventing a crash downstream and harden accumulators to prevent empty containers. fixes #94
1 parent 4387f53 commit ec1fcd3

File tree

2 files changed

+40
-25
lines changed

2 files changed

+40
-25
lines changed

include/maths/CBasicStatistics.h

+12-1
Original file line numberDiff line numberDiff line change
@@ -1241,6 +1241,9 @@ class MATHS_EXPORT CBasicStatistics {
12411241
class COrderStatisticsStack
12421242
: public COrderStatisticsImpl<T, boost::array<T, N>, LESS>,
12431243
private boost::addable<COrderStatisticsStack<T, N, LESS>> {
1244+
1245+
static_assert(N > 0, "N must be > 0");
1246+
12441247
private:
12451248
using TArray = boost::array<T, N>;
12461249
using TImpl = COrderStatisticsImpl<T, TArray, LESS>;
@@ -1327,10 +1330,18 @@ class MATHS_EXPORT CBasicStatistics {
13271330

13281331
public:
13291332
explicit COrderStatisticsHeap(std::size_t n, const LESS& less = LESS{})
1330-
: TImpl{std::vector<T>(n, T{}), less} {}
1333+
: TImpl{std::vector<T>(std::max(n, std::size_t(1)), T{}), less} {
1334+
if (n == 0) {
1335+
LOG_ERROR(<< "Invalid size of 0 for order statistics accumulator");
1336+
}
1337+
}
13311338

13321339
//! Reset the number of statistics to gather to \p n.
13331340
void resize(std::size_t n) {
1341+
if (n == 0) {
1342+
LOG_ERROR(<< "Invalid resize to 0 for order statistics accumulator");
1343+
n = 1;
1344+
}
13341345
this->clear();
13351346
this->statistics().resize(n);
13361347
}

lib/maths/CTimeSeriesDecompositionDetail.cc

+28-24
Original file line numberDiff line numberDiff line change
@@ -1595,30 +1595,34 @@ void CTimeSeriesDecompositionDetail::CComponents::reweightOutliers(
15951595
})};
15961596
double numberOutliers{SEASONAL_OUTLIER_FRACTION * numberValues};
15971597

1598-
TMinAccumulator outliers{static_cast<std::size_t>(2.0 * numberOutliers)};
1599-
TMeanAccumulator meanDifference;
1600-
core_t::TTime time = startTime + dt / 2;
1601-
for (std::size_t i = 0; i < values.size(); ++i, time += dt) {
1602-
if (CBasicStatistics::count(values[i]) > 0.0) {
1603-
double difference{std::fabs(CBasicStatistics::mean(values[i]) - predictor(time))};
1604-
outliers.add({-difference, i});
1605-
meanDifference.add(difference);
1606-
}
1607-
}
1608-
outliers.sort();
1609-
TMeanAccumulator meanDifferenceOfOutliers;
1610-
for (std::size_t i = 0u; i < static_cast<std::size_t>(numberOutliers); ++i) {
1611-
meanDifferenceOfOutliers.add(-outliers[i].first);
1612-
}
1613-
meanDifference -= meanDifferenceOfOutliers;
1614-
for (std::size_t i = 0; i < outliers.count(); ++i) {
1615-
if (-outliers[i].first > SEASONAL_OUTLIER_DIFFERENCE_THRESHOLD *
1616-
CBasicStatistics::mean(meanDifference)) {
1617-
double weight{SEASONAL_OUTLIER_WEIGHT +
1618-
(1.0 - SEASONAL_OUTLIER_WEIGHT) *
1619-
CTools::logisticFunction(static_cast<double>(i) / numberOutliers,
1620-
0.1, 1.0)};
1621-
CBasicStatistics::count(values[outliers[i].second]) *= weight;
1598+
if (numberOutliers > 1.0) {
1599+
1600+
TMinAccumulator outliers{static_cast<std::size_t>(2.0 * numberOutliers)};
1601+
TMeanAccumulator meanDifference;
1602+
core_t::TTime time = startTime + dt / 2;
1603+
for (std::size_t i = 0; i < values.size(); ++i, time += dt) {
1604+
if (CBasicStatistics::count(values[i]) > 0.0) {
1605+
double difference{
1606+
std::fabs(CBasicStatistics::mean(values[i]) - predictor(time))};
1607+
outliers.add({-difference, i});
1608+
meanDifference.add(difference);
1609+
}
1610+
}
1611+
outliers.sort();
1612+
TMeanAccumulator meanDifferenceOfOutliers;
1613+
for (std::size_t i = 0u; i < static_cast<std::size_t>(numberOutliers); ++i) {
1614+
meanDifferenceOfOutliers.add(-outliers[i].first);
1615+
}
1616+
meanDifference -= meanDifferenceOfOutliers;
1617+
for (std::size_t i = 0; i < outliers.count(); ++i) {
1618+
if (-outliers[i].first > SEASONAL_OUTLIER_DIFFERENCE_THRESHOLD *
1619+
CBasicStatistics::mean(meanDifference)) {
1620+
double weight{SEASONAL_OUTLIER_WEIGHT +
1621+
(1.0 - SEASONAL_OUTLIER_WEIGHT) *
1622+
CTools::logisticFunction(static_cast<double>(i) / numberOutliers,
1623+
0.1, 1.0)};
1624+
CBasicStatistics::count(values[outliers[i].second]) *= weight;
1625+
}
16221626
}
16231627
}
16241628
}

0 commit comments

Comments
 (0)