Skip to content

Commit b1eb036

Browse files
authored
make stdev calculation more numerically stable (#3984)
1 parent 36fc0ee commit b1eb036

File tree

2 files changed

+40
-8
lines changed

2 files changed

+40
-8
lines changed

ydb/core/mind/hive/hive.h

+22-8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22
#include <bitset>
3+
#include <ranges>
34

45
#include <util/generic/queue.h>
56
#include <util/random/random.h>
@@ -201,20 +202,33 @@ TResourceNormalizedValues NormalizeRawValues(const TResourceRawValues& values, c
201202
NMetrics::EResource GetDominantResourceType(const TResourceRawValues& values, const TResourceRawValues& maximum);
202203
NMetrics::EResource GetDominantResourceType(const TResourceNormalizedValues& normValues);
203204

205+
// https://en.wikipedia.org/wiki/Kahan_summation_algorithm
206+
template<std::ranges::range TRange>
207+
std::ranges::range_value_t<TRange> StableSum(const TRange& values) {
208+
using TValue = std::ranges::range_value_t<TRange>;
209+
TValue sum{};
210+
TValue correction{};
211+
for (const auto& x : values) {
212+
TValue y = x - correction;
213+
TValue tmp = sum + y;
214+
correction = (tmp - sum) - y;
215+
sum = tmp;
216+
}
217+
return sum;
218+
}
219+
204220
template <typename... ResourceTypes>
205221
inline std::tuple<ResourceTypes...> GetStDev(const TVector<std::tuple<ResourceTypes...>>& values) {
206222
std::tuple<ResourceTypes...> sum;
207223
if (values.empty())
208224
return sum;
209-
for (const auto& v : values) {
210-
sum = sum + v;
211-
}
225+
sum = StableSum(values);
212226
auto mean = sum / values.size();
213-
sum = std::tuple<ResourceTypes...>();
214-
for (const auto& v : values) {
215-
auto diff = v - mean;
216-
sum = sum + diff * diff;
217-
}
227+
auto quadraticDev = [&] (const std::tuple<ResourceTypes...>& value) {
228+
auto diff = value - mean;
229+
return diff * diff;
230+
};
231+
sum = StableSum(values | std::views::transform(quadraticDev));
218232
auto div = sum / values.size();
219233
auto st_dev = sqrt(div);
220234
return tuple_cast<ResourceTypes...>::cast(st_dev);

ydb/core/mind/hive/hive_impl_ut.cpp

+18
Original file line numberDiff line numberDiff line change
@@ -193,4 +193,22 @@ Y_UNIT_TEST_SUITE(THiveImplTest) {
193193
Ctest << "HIVE_TABLET_BALANCE_STRATEGY_RANDOM" << Endl;
194194
CheckSpeedAndDistribution(allTablets, BalanceTablets<NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM>, EResourceToBalance::Memory);
195195
}
196+
197+
Y_UNIT_TEST(TestStDev) {
198+
using TSingleResource = std::tuple<double>;
199+
200+
TVector<TSingleResource> values(100, 50.0 / 1'000'000);
201+
values.front() = 51.0 / 1'000'000;
202+
203+
double stDev1 = std::get<0>(GetStDev(values));
204+
205+
std::swap(values.front(), values.back());
206+
207+
double stDev2 = std::get<0>(GetStDev(values));
208+
209+
double expectedStDev = sqrt(0.9703) / 1'000'000;
210+
211+
UNIT_ASSERT_DOUBLES_EQUAL(expectedStDev, stDev1, 1e-6);
212+
UNIT_ASSERT_VALUES_EQUAL(stDev1, stDev2);
213+
}
196214
}

0 commit comments

Comments
 (0)