@@ -303,29 +303,28 @@ void project(const TFloatMeanAccumulatorVec& values,
303
303
304
304
// ! Calculate the number of non-empty buckets at each bucket offset in
305
305
// ! the period for the \p values in \p windows.
306
- TSizeVec calculateRepeats (const TSizeSizePr2Vec& windows,
307
- std::size_t period,
308
- const TFloatMeanAccumulatorVec& values) {
309
- TSizeVec result (std::min (period, length (windows[0 ])), 0 );
306
+ TDoubleVec calculateRepeats (const TSizeSizePr2Vec& windows,
307
+ std::size_t period,
308
+ const TFloatMeanAccumulatorVec& values) {
309
+ TDoubleVec result (std::min (period, length (windows[0 ])), 0 );
310
310
std::size_t n{values.size ()};
311
311
for (const auto & window : windows) {
312
312
std::size_t a{window.first };
313
313
std::size_t b{window.second };
314
314
for (std::size_t i = a; i < b; ++i) {
315
- if (CBasicStatistics::count (values[i % n]) > 0.0 ) {
316
- ++result[(i - a) % period];
317
- }
315
+ double count{CBasicStatistics::count (values[i % n])};
316
+ result[(i - a) % period] += std::min (count, 1.0 );
318
317
}
319
318
}
320
319
return result;
321
320
}
322
321
323
322
// ! Calculate the number of non-empty buckets at each bucket offset in
324
323
// ! the period for the \p values in \p windows.
325
- TSizeVec calculateRepeats (const TTimeTimePr2Vec& windows_,
326
- core_t ::TTime period,
327
- core_t ::TTime bucketLength,
328
- const TFloatMeanAccumulatorVec& values) {
324
+ TDoubleVec calculateRepeats (const TTimeTimePr2Vec& windows_,
325
+ core_t ::TTime period,
326
+ core_t ::TTime bucketLength,
327
+ const TFloatMeanAccumulatorVec& values) {
329
328
TSizeSizePr2Vec windows;
330
329
calculateIndexWindows (windows_, bucketLength, windows);
331
330
return calculateRepeats (windows, period / bucketLength, values);
@@ -344,53 +343,53 @@ void reweightOutliers(const std::vector<T>& trend,
344
343
using TMaxAccumulator =
345
344
CBasicStatistics::COrderStatisticsHeap<TDoubleSizePr, std::greater<TDoubleSizePr>>;
346
345
347
- if (values.size () > 0 ) {
346
+ std::size_t period{trend.size ()};
347
+ std::size_t numberOutliers{static_cast <std::size_t >([&period, &values] {
348
+ std::size_t count (std::count_if (
349
+ values.begin (), values.end (), [](const TFloatMeanAccumulator& value) {
350
+ return CBasicStatistics::count (value) > 0.0 ;
351
+ }));
352
+ return SEASONAL_OUTLIER_FRACTION *
353
+ static_cast <double >(count - std::min (count, period));
354
+ }())};
355
+ LOG_TRACE (<< " Number outliers = " << numberOutliers);
356
+
357
+ if (numberOutliers > 0 ) {
348
358
TSizeSizePr2Vec windows;
349
359
calculateIndexWindows (windows_, bucketLength, windows);
350
- std::size_t period{trend.size ()};
351
360
std::size_t n{values.size ()};
352
361
353
- TSizeVec repeats{calculateRepeats (windows, period, values)};
354
- double excess{std::accumulate (
355
- repeats.begin (), repeats.end (), 0.0 , [](double excess_, std::size_t repeat) {
356
- return excess_ + static_cast <double >(repeat > 1 ? repeat - 1 : 0 );
357
- })};
358
- std::size_t numberOutliers{static_cast <std::size_t >(SEASONAL_OUTLIER_FRACTION * excess)};
359
- LOG_TRACE (<< " Number outliers = " << numberOutliers);
360
-
361
- if (numberOutliers > 0 ) {
362
- TMaxAccumulator outliers{numberOutliers};
363
- TMeanAccumulator meanDifference;
364
- for (const auto & window : windows) {
365
- std::size_t a{window.first };
366
- std::size_t b{window.second };
367
- for (std::size_t j = a; j < b; ++j) {
368
- const TFloatMeanAccumulator& value{values[j % n]};
369
- if (CBasicStatistics::count (value) > 0.0 ) {
370
- std::size_t offset{(j - a) % period};
371
- double difference{std::fabs (CBasicStatistics::mean (value) -
372
- CBasicStatistics::mean (trend[offset]))};
373
- outliers.add ({difference, j});
374
- meanDifference.add (difference);
375
- }
362
+ TMaxAccumulator outliers{numberOutliers};
363
+ TMeanAccumulator meanDifference;
364
+ for (const auto & window : windows) {
365
+ std::size_t a{window.first };
366
+ std::size_t b{window.second };
367
+ for (std::size_t j = a; j < b; ++j) {
368
+ const TFloatMeanAccumulator& value{values[j % n]};
369
+ if (CBasicStatistics::count (value) > 0.0 ) {
370
+ std::size_t offset{(j - a) % period};
371
+ double difference{std::fabs (CBasicStatistics::mean (value) -
372
+ CBasicStatistics::mean (trend[offset]))};
373
+ outliers.add ({difference, j});
374
+ meanDifference.add (difference);
376
375
}
377
376
}
378
- TMeanAccumulator meanDifferenceOfOutliers;
379
- for ( const auto & outlier : outliers) {
380
- meanDifferenceOfOutliers. add (outlier. first );
381
- }
382
- meanDifference -= meanDifferenceOfOutliers;
383
- LOG_TRACE (<< " mean difference = " << CBasicStatistics::mean (meanDifference)) ;
384
- LOG_TRACE (<< " outliers = " << core::CContainerPrinter::print (outliers ));
385
-
386
- for ( const auto & outlier : outliers) {
387
- if ( outlier. first > SEASONAL_OUTLIER_DIFFERENCE_THRESHOLD *
388
- CBasicStatistics::mean (meanDifference)) {
389
- CBasicStatistics::count (values[outlier. second % n]) *= SEASONAL_OUTLIER_WEIGHT;
390
- }
377
+ }
378
+ TMeanAccumulator meanDifferenceOfOutliers;
379
+ for ( const auto & outlier : outliers) {
380
+ meanDifferenceOfOutliers. add (outlier. first );
381
+ }
382
+ meanDifference -= meanDifferenceOfOutliers ;
383
+ LOG_TRACE (<< " mean difference = " << CBasicStatistics::mean (meanDifference ));
384
+ LOG_TRACE (<< " outliers = " << core::CContainerPrinter::print (outliers));
385
+
386
+ for ( const auto & outlier : outliers) {
387
+ if (outlier. first > SEASONAL_OUTLIER_DIFFERENCE_THRESHOLD *
388
+ CBasicStatistics::mean (meanDifference)) {
389
+ CBasicStatistics::count (values[outlier. second % n]) *= SEASONAL_OUTLIER_WEIGHT;
391
390
}
392
- LOG_TRACE (<< " Values - outliers = " << core::CContainerPrinter::print (values));
393
391
}
392
+ LOG_TRACE (<< " Values - outliers = " << core::CContainerPrinter::print (values));
394
393
}
395
394
}
396
395
@@ -1839,10 +1838,12 @@ bool CPeriodicityHypothesisTests::testPeriodWithScaling(const TTimeTimePr2Vec& w
1839
1838
}
1840
1839
1841
1840
// Compute the degrees of freedom given the alternative hypothesis.
1842
- TSizeVec repeats (calculateRepeats (windows, period_, m_BucketLength, values));
1843
- double b{static_cast <double >(
1844
- std::count_if (repeats.begin (), repeats.end (),
1845
- [](std::size_t repeat) { return repeat > 0 ; }))};
1841
+ double b{[&windows, &period_, &values, this ] {
1842
+ TDoubleVec repeats (calculateRepeats (windows, period_, m_BucketLength, values));
1843
+ return static_cast <double >(
1844
+ std::count_if (repeats.begin (), repeats.end (),
1845
+ [](double repeat) { return repeat > 0.0 ; }));
1846
+ }()};
1846
1847
double df1{stats.s_NonEmptyBuckets - b - static_cast <double >(segmentation.size () - 2 )};
1847
1848
LOG_TRACE (<< " populated = " << b);
1848
1849
@@ -2055,14 +2056,10 @@ bool CPeriodicityHypothesisTests::testPartition(const TTimeTimePr2Vec& partition
2055
2056
// 3) The significance of the variance reduction, and
2056
2057
// 4) The amount of variance reduction.
2057
2058
2058
- auto calculateMeanRepeats = [&](const TTimeTimePr2Vec& w, core_t ::TTime p) {
2059
- TSizeVec repeats{calculateRepeats (w, p, m_BucketLength, values)};
2060
- return CBasicStatistics::mean (
2061
- std::accumulate (repeats.begin (), repeats.end (), TMeanAccumulator{},
2062
- [](TMeanAccumulator mean, std::size_t r) {
2063
- mean.add (static_cast <double >(r));
2064
- return mean;
2065
- }));
2059
+ auto calculateMeanRepeats = [&values, this ](const TTimeTimePr2Vec& w, core_t ::TTime p) {
2060
+ TMeanAccumulator result;
2061
+ result.add (calculateRepeats (w, p, m_BucketLength, values));
2062
+ return CBasicStatistics::mean (result);
2066
2063
};
2067
2064
2068
2065
double p{0.0 };
@@ -2143,13 +2140,11 @@ bool CPeriodicityHypothesisTests::testVariance(const TTimeTimePr2Vec& window,
2143
2140
LOG_TRACE (<< " autocorrelation = " << R);
2144
2141
LOG_TRACE (<< " autocorrelationThreshold = " << stats.s_AutocorrelationThreshold );
2145
2142
2146
- TSizeVec repeats{calculateRepeats (window, period_, m_BucketLength, buckets)};
2147
- meanRepeats = CBasicStatistics::mean (
2148
- std::accumulate (repeats.begin (), repeats.end (), TMeanAccumulator{},
2149
- [](TMeanAccumulator mean, std::size_t repeat) {
2150
- mean.add (static_cast <double >(repeat));
2151
- return mean;
2152
- }));
2143
+ meanRepeats = [&window, &period_, &buckets, this ] {
2144
+ TMeanAccumulator result;
2145
+ result.add (calculateRepeats (window, period_, m_BucketLength, buckets));
2146
+ return CBasicStatistics::mean (result);
2147
+ }();
2153
2148
LOG_TRACE (<< " mean repeats = " << meanRepeats);
2154
2149
2155
2150
// We're trading off:
0 commit comments