Skip to content

Commit 2fc250f

Browse files
committed
add health check overload shard hint (ydb-platform#14416)
1 parent 65b8f4e commit 2fc250f

File tree

2 files changed

+31
-21
lines changed

2 files changed

+31
-21
lines changed

ydb/core/health_check/health_check.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -714,7 +714,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
714714

715715
THashMap<TString, THintOverloadedShard> OverloadedShardHints;
716716
static constexpr size_t MAX_OVERLOADED_SHARDS_HINTS = 10;
717-
static constexpr double OVERLOADED_SHARDS_CPU_CORES = 0.01; /* HACK */
717+
static constexpr double OVERLOADED_SHARDS_CPU_CORES = 0.75;
718718

719719
struct TTabletRequestsState {
720720
struct TTabletState {
@@ -992,7 +992,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
992992
[[nodiscard]] TRequestResponse<TEvSysView::TEvGetPartitionStatsResult> RequestPartitionStats(TTabletId schemeShardId, TSubDomainKey subDomainKey) {
993993
THolder<TEvSysView::TEvGetPartitionStats> request = MakeHolder<TEvSysView::TEvGetPartitionStats>();
994994
NKikimrSysView::TEvGetPartitionStats& record = request->Record;
995-
record.MutableFilter()->MutableNotLess()->SetCPUCores(OVERLOADED_SHARDS_CPU_CORES); //
995+
record.MutableFilter()->MutableNotLess()->SetCPUCores(OVERLOADED_SHARDS_CPU_CORES);
996996
record.SetDomainKeyOwnerId(subDomainKey.GetSchemeShard());
997997
record.SetDomainKeyPathId(subDomainKey.GetPathId());
998998
return RequestTabletPipe<TEvSysView::TEvGetPartitionStatsResult>(schemeShardId, request.Release(), TTabletRequestsState::RequestGetPartitionStats);
@@ -1455,7 +1455,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
14551455

14561456
bool CheckOverloadedShardHint(const TEvSchemeShard::TEvDescribeSchemeResult& response, THintOverloadedShard& hint) {
14571457
const auto& policy(response.GetRecord().GetPathDescription().GetTable().GetPartitionConfig().GetPartitioningPolicy());
1458-
if (/*HACK ! */policy.GetSplitByLoadSettings().GetEnabled()) {
1458+
if (!policy.GetSplitByLoadSettings().GetEnabled()) {
14591459
hint.Message = "Split by load is disabled on the table"; // do not change without changing the logic in the UI
14601460
return true;
14611461
}

ydb/core/sys_view/partition_stats/partition_stats.cpp

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,27 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
309309
}
310310

311311
bool includePathColumn = !record.HasIncludePathColumn() || record.GetIncludePathColumn();
312+
bool hasFilter = record.HasFilter();
313+
auto matchesFilter = [&](const NKikimrSysView::TPartitionStats& stats) {
314+
const auto& filter = record.GetFilter();
315+
if (filter.HasNotLess()) {
316+
if (filter.GetNotLess().HasCPUCores() && stats.GetCPUCores() < filter.GetNotLess().GetCPUCores()) {
317+
return false;
318+
}
319+
}
320+
return true;
321+
};
322+
auto addStats = [&](const auto& pathId, const auto& tableStats, ui64 partIdx) {
323+
auto stats = result->Record.AddStats();
324+
auto* key = stats->MutableKey();
325+
key->SetOwnerId(pathId.OwnerId);
326+
key->SetPathId(pathId.LocalPathId);
327+
key->SetPartIdx(partIdx);
328+
if (includePathColumn) {
329+
stats->SetPath(tableStats.Path);
330+
}
331+
return stats;
332+
};
312333

313334
auto matchesFilter = [&](const NKikimrSysView::TPartitionStats& stats) {
314335
if (record.HasFilter()) {
@@ -335,35 +356,24 @@ class TPartitionStatsCollector : public TActorBootstrapped<TPartitionStatsCollec
335356

336357
for (ui64 partIdx = startPartIdx; partIdx < end; ++partIdx) {
337358
NKikimrSysView::TPartitionStatsResult* stats = nullptr;
359+
if (!hasFilter) {
360+
stats = addStats(pathId, tableStats, partIdx);
361+
}
338362
auto shardIdx = tableStats.ShardIndices[partIdx];
339363
auto part = tableStats.Partitions.find(shardIdx);
340364
if (part != tableStats.Partitions.end()) {
341365
for (const auto& followerStat : part->second.FollowerStats) {
342-
if (!matchesFilter(followerStat.second)) {
366+
if (hasFilter && !matchesFilter(followerStat.second)) {
343367
continue;
344368
}
345-
if (stats == nullptr) {
346-
stats = result->Record.AddStats();
369+
if (!stats) {
370+
stats = addStats(pathId, tableStats, partIdx);
347371
}
348372
*stats->AddStats() = followerStat.second;
349373
}
350374
}
351375

352-
if (!stats) {
353-
continue;
354-
}
355-
356-
auto* key = stats->MutableKey();
357-
358-
key->SetOwnerId(pathId.OwnerId);
359-
key->SetPathId(pathId.LocalPathId);
360-
key->SetPartIdx(partIdx);
361-
362-
if (includePathColumn) {
363-
stats->SetPath(tableStats.Path);
364-
}
365-
366-
if (++count == BatchSize) {
376+
if (stats && ++count == BatchSize) {
367377
auto* next = result->Record.MutableNext();
368378
next->SetOwnerId(pathId.OwnerId);
369379
next->SetPathId(pathId.LocalPathId);

0 commit comments

Comments
 (0)