Skip to content

Commit c305d5d

Browse files
authored
Merge 5d350b6 into d132765
2 parents d132765 + 5d350b6 commit c305d5d

File tree

5 files changed

+107
-7
lines changed

5 files changed

+107
-7
lines changed

ydb/core/mind/hive/hive_ut.cpp

+90
Original file line numberDiff line numberDiff line change
@@ -4507,6 +4507,96 @@ Y_UNIT_TEST_SUITE(THiveTest) {
45074507
UNIT_ASSERT_LE(movedToFirstNode, TABLETS_PER_NODE / 2);
45084508
}
45094509

4510+
Y_UNIT_TEST(TestHiveNoBalancingWithLowResourceUsage) {
4511+
static constexpr ui64 NUM_NODES = 5;
4512+
static constexpr ui64 NUM_TABLETS = 100;
4513+
TTestBasicRuntime runtime(NUM_NODES, false);
4514+
Setup(runtime, true, 1, [](TAppPrepare& app) {
4515+
app.HiveConfig.SetTabletKickCooldownPeriod(0);
4516+
app.HiveConfig.SetResourceChangeReactionPeriod(0);
4517+
app.HiveConfig.SetMetricsWindowSize(1);
4518+
});
4519+
const int nodeBase = runtime.GetNodeId(0);
4520+
TActorId senderA = runtime.AllocateEdgeActor();
4521+
const ui64 hiveTablet = MakeDefaultHiveID(0);
4522+
const ui64 testerTablet = MakeDefaultHiveID(1);
4523+
4524+
auto getDistribution = [hiveTablet, nodeBase, senderA, &runtime]() -> std::array<std::vector<ui64>, NUM_NODES> {
4525+
std::array<std::vector<ui64>, NUM_NODES> nodeTablets = {};
4526+
{
4527+
runtime.SendToPipe(hiveTablet, senderA, new TEvHive::TEvRequestHiveInfo());
4528+
TAutoPtr<IEventHandle> handle;
4529+
TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveInfo>(handle);
4530+
for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) {
4531+
UNIT_ASSERT_C(((int)tablet.GetNodeID() - nodeBase >= 0) && (tablet.GetNodeID() - nodeBase < NUM_NODES),
4532+
"nodeId# " << tablet.GetNodeID() << " nodeBase# " << nodeBase);
4533+
nodeTablets[tablet.GetNodeID() - nodeBase].push_back(tablet.GetTabletID());
4534+
}
4535+
}
4536+
return nodeTablets;
4537+
};
4538+
4539+
CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive);
4540+
4541+
// wait for creation of nodes
4542+
{
4543+
TDispatchOptions options;
4544+
options.FinalEvents.emplace_back(TEvLocal::EvStatus, NUM_NODES);
4545+
runtime.DispatchEvents(options);
4546+
}
4547+
4548+
TTabletTypes::EType tabletType = TTabletTypes::Dummy;
4549+
std::vector<ui64> tablets;
4550+
tablets.reserve(NUM_TABLETS);
4551+
for (size_t i = 0; i < NUM_TABLETS; ++i) {
4552+
THolder<TEvHive::TEvCreateTablet> ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + i, tabletType, BINDED_CHANNELS));
4553+
ev->Record.SetObjectId(i);
4554+
ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true);
4555+
MakeSureTabletIsUp(runtime, tabletId, 0);
4556+
tablets.push_back(tabletId);
4557+
}
4558+
4559+
auto initialDistribution = getDistribution();
4560+
4561+
// report small metrics for some tablets
4562+
auto rand = CreateDeterministicRandomProvider(777);
4563+
for (auto tablet : tablets) {
4564+
THolder<TEvHive::TEvTabletMetrics> metrics = MakeHolder<TEvHive::TEvTabletMetrics>();
4565+
NKikimrHive::TTabletMetrics* metric = metrics->Record.AddTabletMetrics();
4566+
metric->SetTabletID(tablet);
4567+
if (rand->GenRand() % 2) {
4568+
metric->MutableResourceUsage()->SetCPU(1001); // 1% core
4569+
metric->MutableResourceUsage()->SetMemory(150'000); // 150kb
4570+
} else {
4571+
metric->MutableResourceUsage()->SetCPU(999);
4572+
metric->MutableResourceUsage()->SetMemory(100'000);
4573+
}
4574+
4575+
runtime.SendToPipe(hiveTablet, senderA, metrics.Release());
4576+
}
4577+
4578+
{
4579+
TDispatchOptions options;
4580+
options.FinalEvents.emplace_back(NHive::TEvPrivate::EvBalancerOut);
4581+
runtime.DispatchEvents(options, TDuration::Seconds(10));
4582+
}
4583+
4584+
// Check that balancer moved no tablets
4585+
auto newDistribution = getDistribution();
4586+
4587+
UNIT_ASSERT_EQUAL(initialDistribution, newDistribution);
4588+
4589+
{
4590+
auto request = std::make_unique<TEvHive::TEvRequestHiveDomainStats>();
4591+
request->Record.SetReturnMetrics(true);
4592+
runtime.SendToPipe(hiveTablet, senderA, request.release());
4593+
TAutoPtr<IEventHandle> handle;
4594+
TEvHive::TEvResponseHiveDomainStats* response = runtime.GrabEdgeEventRethrow<TEvHive::TEvResponseHiveDomainStats>(handle);
4595+
ui64 totalCounter = response->Record.GetDomainStats(0).GetMetrics().GetCounter();
4596+
UNIT_ASSERT_VALUES_EQUAL(totalCounter, 0);
4597+
}
4598+
}
4599+
45104600
Y_UNIT_TEST(TestHiveBalancerWithImmovableTablets) {
45114601
static constexpr ui64 TABLETS_PER_NODE = 10;
45124602
TTestBasicRuntime runtime(3, false);

ydb/core/mind/hive/tablet_info.cpp

+12-6
Original file line numberDiff line numberDiff line change
@@ -404,14 +404,21 @@ TResourceRawValues TTabletInfo::GetResourceMaximumValues() const {
404404
}
405405
}
406406

407-
i64 TTabletInfo::GetCounterValue(const NKikimrTabletBase::TMetrics& metrics, const TVector<i64>& allowedMetricIds) {
408-
if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kCPUFieldNumber) != allowedMetricIds.end() && THive::IsValidMetricsCPU(metrics)) {
407+
i64 TTabletInfo::GetCounterValue() const {
408+
const auto& allowedMetricIds = GetTabletAllowedMetricIds();
409+
if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kCPUFieldNumber) != allowedMetricIds.end()
410+
&& (ResourceMetricsAggregates.MaximumCPU.GetAllTimeMaximum() > 0
411+
|| ResourceValues.GetCPU() > 0)) {
409412
return 0;
410413
}
411-
if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kMemoryFieldNumber) != allowedMetricIds.end() && THive::IsValidMetricsMemory(metrics)) {
414+
if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kMemoryFieldNumber) != allowedMetricIds.end()
415+
&& (ResourceMetricsAggregates.MaximumMemory.GetAllTimeMaximum() > 0
416+
|| ResourceValues.GetMemory() > 0)) {
412417
return 0;
413418
}
414-
if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kNetworkFieldNumber) != allowedMetricIds.end() && THive::IsValidMetricsNetwork(metrics)) {
419+
if (Find(allowedMetricIds, NKikimrTabletBase::TMetrics::kNetworkFieldNumber) != allowedMetricIds.end()
420+
&& (ResourceMetricsAggregates.MaximumNetwork.GetAllTimeMaximum() > 0
421+
|| ResourceValues.GetNetwork() > 0)) {
415422
return 0;
416423
}
417424
return 1;
@@ -452,8 +459,7 @@ void TTabletInfo::FilterRawValues(TResourceNormalizedValues& values) const {
452459
}
453460

454461
void TTabletInfo::ActualizeCounter() {
455-
auto value = GetCounterValue(ResourceValues, GetTabletAllowedMetricIds());
456-
ResourceValues.SetCounter(value);
462+
ResourceValues.SetCounter(GetCounterValue());
457463
}
458464

459465
const TNodeFilter& TTabletInfo::GetNodeFilter() const {

ydb/core/mind/hive/tablet_info.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ struct TTabletInfo {
230230
void UpdateResourceUsage(const NKikimrTabletBase::TMetrics& metrics);
231231
TResourceRawValues GetResourceCurrentValues() const;
232232
TResourceRawValues GetResourceMaximumValues() const;
233-
static i64 GetCounterValue(const NKikimrTabletBase::TMetrics& metrics, const TVector<i64>& allowedMetricIds);
233+
i64 GetCounterValue() const;
234234
void FilterRawValues(TResourceRawValues& values) const;
235235
void FilterRawValues(TResourceNormalizedValues& values) const;
236236
void ActualizeCounter();

ydb/core/protos/metrics.proto

+1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ package NKikimrMetricsProto;
33
message TMaximumValueUI64 {
44
optional uint64 LastBucketStartTime = 1;
55
repeated uint64 Values = 2;
6+
optional uint64 AllTimeMaximum = 3;
67
}

ydb/core/util/metrics.h

+3
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,9 @@ class TMaximumValueVariableWindowUI64 : public NKikimrMetricsProto::TMaximumValu
395395
using TProto = NKikimrMetricsProto::TMaximumValueUI64;
396396

397397
void SetValue(TType value, TInstant now = TInstant::Now()) {
398+
if (TProto::GetAllTimeMaximum() > 0 || MaximumValue > 0) { // ignoring initial value
399+
TProto::SetAllTimeMaximum(std::max(value, TProto::GetAllTimeMaximum()));
400+
}
398401
TDuration elapsedCurrentBucket = now - TInstant::MilliSeconds(TProto::GetLastBucketStartTime());
399402
if (TProto::ValuesSize() == 0 || elapsedCurrentBucket >= BucketDuration) {
400403
size_t bucketsPassed = 0;

0 commit comments

Comments
 (0)