Skip to content

Commit a1b9f20

Browse files
committed
add NumberOfCpus to tenantinfo handler
1 parent e8eb65c commit a1b9f20

File tree

8 files changed

+116
-33
lines changed

8 files changed

+116
-33
lines changed

ydb/core/base/pool_stats_collector.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,27 @@ class TStatsCollectingActor : public NActors::TStatsCollectingActor {
5151
};
5252

5353
void OnWakeup(const TActorContext &ctx) override {
54+
static auto whiteboardId = NNodeWhiteboard::MakeNodeWhiteboardServiceId(ctx.SelfID.NodeId());
5455
MiniKQLPoolStats.Update();
5556

56-
TVector<std::tuple<TString, double, ui32, ui32>> pools;
57+
auto systemUpdate = std::make_unique<NNodeWhiteboard::TEvWhiteboard::TEvSystemStateUpdate>();
58+
ui32 coresTotal = 0;
59+
double coresUsed = 0;
5760
for (const auto& pool : PoolCounters) {
58-
pools.emplace_back(pool.Name, pool.Usage, pool.Threads, pool.LimitThreads);
61+
auto& pb = *systemUpdate->Record.AddPoolStats();
62+
pb.SetName(pool.Name);
63+
pb.SetUsage(pool.Usage);
64+
pb.SetThreads(static_cast<ui32>(pool.Threads));
65+
pb.SetLimit(static_cast<ui32>(pool.LimitThreads));
66+
if (pool.Name != "IO") {
67+
coresTotal += static_cast<ui32>(pool.DefaultThreads);
68+
}
69+
coresUsed += pool.Usage * pool.LimitThreads;
5970
}
71+
systemUpdate->Record.SetCoresTotal(coresTotal);
72+
systemUpdate->Record.SetCoresUsed(coresUsed);
6073

61-
ctx.Send(NNodeWhiteboard::MakeNodeWhiteboardServiceId(ctx.SelfID.NodeId()), new NNodeWhiteboard::TEvWhiteboard::TEvSystemStateUpdate(pools));
74+
ctx.Send(whiteboardId, systemUpdate.release());
6275
}
6376

6477
private:

ydb/core/protos/node_whiteboard.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ message TNodeStateInfo {
9797
optional uint64 ChangeTime = 4;
9898
optional uint32 OutputQueueSize = 5 [(InsignificantChangeAmount) = 1048576]; // 1Mb
9999
optional EFlag ConnectStatus = 6;
100+
optional uint32 PeerNodeId = 7;
101+
optional int64 ClockSkewUs = 8;
100102
optional uint32 Count = 13; // filled during group count
101103
}
102104

@@ -341,6 +343,8 @@ message TSystemStateInfo {
341343
optional uint32 TotalSessions = 36 [(DefaultField) = true];
342344
optional string NodeName = 37 [(DefaultField) = true];
343345
optional NKikimrMemory.TMemoryStats MemoryStats = 38;
346+
optional double CoresUsed = 39;
347+
optional uint32 CoresTotal = 40;
344348
}
345349

346350
message TEvSystemStateRequest {

ydb/core/viewer/json_handlers_viewer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ void InitViewerStorageUsageJsonHandler(TJsonHandlers &handlers) {
204204
}
205205

206206
void InitViewerClusterJsonHandler(TJsonHandlers& handlers) {
207-
handlers.AddHandler("/viewer/cluster", new TJsonHandler<TJsonCluster>(TJsonCluster::GetSwagger()), 4);
207+
handlers.AddHandler("/viewer/cluster", new TJsonHandler<TJsonCluster>(TJsonCluster::GetSwagger()), 5);
208208
}
209209

210210
void InitViewerLabeledCountersJsonHandler(TJsonHandlers &handlers) {
@@ -220,7 +220,7 @@ void InitViewerHiveStatsJsonHandler(TJsonHandlers& handlers) {
220220
}
221221

222222
void InitViewerTenantInfoJsonHandler(TJsonHandlers &handlers) {
223-
handlers.AddHandler("/viewer/tenantinfo", new TJsonHandler<TJsonTenantInfo>(TJsonTenantInfo::GetSwagger()), 2);
223+
handlers.AddHandler("/viewer/tenantinfo", new TJsonHandler<TJsonTenantInfo>(TJsonTenantInfo::GetSwagger()), 3);
224224
}
225225

226226
void InitViewerWhoAmIJsonHandler(TJsonHandlers& handlers) {
@@ -244,7 +244,7 @@ void InitViewerHealthCheckJsonHandler(TJsonHandlers& handlers) {
244244
}
245245

246246
void InitViewerNodesJsonHandler(TJsonHandlers& handlers) {
247-
handlers.AddHandler("/viewer/nodes", new TJsonHandler<TJsonNodes>(TJsonNodes::GetSwagger()), 10);
247+
handlers.AddHandler("/viewer/nodes", new TJsonHandler<TJsonNodes>(TJsonNodes::GetSwagger()), 11);
248248
}
249249

250250
void InitViewerACLJsonHandler(TJsonHandlers &jsonHandlers) {

ydb/core/viewer/protos/viewer.proto

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ message TClusterInfo {
322322
uint32 NodesTotal = 10;
323323
uint32 NodesAlive = 11;
324324
uint32 NumberOfCpus = 20;
325+
uint32 CoresTotal = 24;
325326
double CoresUsed = 21;
326327
double LoadAverage = 22;
327328
repeated NKikimrWhiteboard.TSystemStateInfo.TPoolStats PoolStats = 23;
@@ -358,7 +359,7 @@ message TStorageUsage {
358359
SSD = 2;
359360
}
360361
EType Type = 1;
361-
uint64 Size = 2;
362+
optional uint64 Size = 2;
362363
uint64 Limit = 3;
363364
uint64 SoftQuota = 4;
364365
uint64 HardQuota = 5;
@@ -394,6 +395,7 @@ message TTenant {
394395
Ydb.Cms.DatabaseQuotas DatabaseQuotas = 42;
395396
repeated TStorageUsage TablesStorage = 44;
396397
repeated TStorageUsage DatabaseStorage = 45;
398+
uint32 CoresTotal = 50;
397399
}
398400

399401
message TTenants {

ydb/core/viewer/viewer_cluster.h

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -342,8 +342,12 @@ class TJsonCluster : public TViewerPipeClient {
342342
}
343343

344344
void InitSystemWhiteboardRequest(NKikimrWhiteboard::TEvSystemStateRequest* request) {
345-
//request->AddFieldsRequired(-1);
346-
Y_UNUSED(request);
345+
for (auto field : {1, 2, 4, 5, 6, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 36, 37}) { // node_whiteboard.proto default fields
346+
request->AddFieldsRequired(field);
347+
}
348+
request->AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kMemoryStatsFieldNumber);
349+
request->AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kCoresUsedFieldNumber);
350+
request->AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kCoresTotalFieldNumber);
347351
}
348352

349353
void InitTabletWhiteboardRequest(NKikimrWhiteboard::TEvTabletStateRequest* request) {
@@ -462,23 +466,40 @@ class TJsonCluster : public TViewerPipeClient {
462466
}
463467
}
464468

469+
struct TMemoryStats {
470+
ui64 Total = 0;
471+
ui64 Limit = 0;
472+
};
473+
474+
std::unordered_set<TString> hostPassed;
475+
std::unordered_map<TString, TMemoryStats> memoryStats;
476+
465477
for (TNode& node : NodeData) {
466478
const NKikimrWhiteboard::TSystemStateInfo& systemState = node.SystemState;
467479
(*ClusterInfo.MutableMapDataCenters())[node.DataCenter]++;
468-
if (systemState.HasNumberOfCpus()) {
480+
if (hostPassed.insert(systemState.GetHost()).second) {
469481
ClusterInfo.SetNumberOfCpus(ClusterInfo.GetNumberOfCpus() + systemState.GetNumberOfCpus());
470-
}
471-
if (systemState.LoadAverageSize() > 0) {
472-
ClusterInfo.SetLoadAverage(ClusterInfo.GetLoadAverage() + systemState.GetLoadAverage(0));
482+
if (systemState.LoadAverageSize() > 0) {
483+
ClusterInfo.SetLoadAverage(ClusterInfo.GetLoadAverage() + systemState.GetLoadAverage(0));
484+
}
473485
}
474486
if (systemState.HasVersion()) {
475487
(*ClusterInfo.MutableMapVersions())[systemState.GetVersion()]++;
476488
}
477489
if (systemState.HasClusterName() && !ClusterInfo.GetName()) {
478490
ClusterInfo.SetName(systemState.GetClusterName());
479491
}
480-
ClusterInfo.SetMemoryTotal(ClusterInfo.GetMemoryTotal() + systemState.GetMemoryLimit());
481492
ClusterInfo.SetMemoryUsed(ClusterInfo.GetMemoryUsed() + systemState.GetMemoryUsed());
493+
if (systemState.HasMemoryStats()) {
494+
TMemoryStats& stats = memoryStats[systemState.GetHost()];
495+
if (systemState.GetMemoryLimit() > 0) {
496+
stats.Limit += systemState.GetMemoryLimit();
497+
} else {
498+
stats.Total = systemState.GetMemoryStats().GetMemTotal();
499+
}
500+
} else {
501+
ClusterInfo.SetMemoryTotal(ClusterInfo.GetMemoryTotal() + systemState.GetMemoryLimit());
502+
}
482503
if (!node.Disconnected && node.SystemState.HasSystemState()) {
483504
ClusterInfo.SetNodesAlive(ClusterInfo.GetNodesAlive() + 1);
484505
}
@@ -500,14 +521,32 @@ class TJsonCluster : public TViewerPipeClient {
500521
targetPoolStat->SetName(poolName);
501522
}
502523
double poolUsage = targetPoolStat->GetUsage() * targetPoolStat->GetThreads();
503-
poolUsage += poolStat.GetUsage() * poolStat.GetThreads();
524+
ui32 usageThreads = poolStat.GetLimit() ? poolStat.GetLimit() : poolStat.GetThreads();
525+
poolUsage += poolStat.GetUsage() * usageThreads;
504526
ui32 poolThreads = targetPoolStat->GetThreads() + poolStat.GetThreads();
505527
if (poolThreads != 0) {
506528
double threadUsage = poolUsage / poolThreads;
507529
targetPoolStat->SetUsage(threadUsage);
508530
targetPoolStat->SetThreads(poolThreads);
509531
}
510-
ClusterInfo.SetCoresUsed(ClusterInfo.GetCoresUsed() + poolStat.GetUsage() * poolStat.GetThreads());
532+
if (systemState.GetCoresTotal() == 0) {
533+
ClusterInfo.SetCoresUsed(ClusterInfo.GetCoresUsed() + poolStat.GetUsage() * usageThreads);
534+
if (poolStat.GetName() != "IO") {
535+
ClusterInfo.SetCoresTotal(ClusterInfo.GetCoresTotal() + poolStat.GetThreads());
536+
}
537+
}
538+
}
539+
if (systemState.GetCoresTotal() != 0) {
540+
ClusterInfo.SetCoresUsed(ClusterInfo.GetCoresUsed() + systemState.GetCoresUsed());
541+
ClusterInfo.SetCoresTotal(ClusterInfo.GetCoresTotal() + systemState.GetCoresTotal());
542+
}
543+
}
544+
545+
for (const auto& memStats : memoryStats) {
546+
if (memStats.second.Total > 0) {
547+
ClusterInfo.SetMemoryTotal(ClusterInfo.GetMemoryTotal() + memStats.second.Total);
548+
} else {
549+
ClusterInfo.SetMemoryTotal(ClusterInfo.GetMemoryTotal() + memStats.second.Limit);
511550
}
512551
}
513552

ydb/core/viewer/viewer_nodes.h

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -261,11 +261,18 @@ class TJsonNodes : public TViewerPipeClient {
261261
}
262262

263263
void CalcCpuUsage() {
264-
float usage = 0;
265-
int threads = 0;
266-
for (const auto& pool : SystemState.GetPoolStats()) {
267-
usage += pool.GetUsage() * pool.GetThreads();
268-
threads += pool.GetThreads();
264+
float usage = SystemState.GetCoresUsed();
265+
int threads = SystemState.GetCoresTotal();
266+
if (threads == 0) {
267+
for (const auto& pool : SystemState.GetPoolStats()) {
268+
ui32 usageThreads = pool.GetLimit() ? pool.GetLimit() : pool.GetThreads();
269+
usage += pool.GetUsage() * usageThreads;
270+
if (pool.GetName() != "IO") {
271+
threads += pool.GetThreads();
272+
}
273+
}
274+
SystemState.SetCoresUsed(usage);
275+
SystemState.SetCoresTotal(threads);
269276
}
270277
CpuUsage = usage / threads;
271278
}
@@ -1724,7 +1731,7 @@ class TJsonNodes : public TViewerPipeClient {
17241731
request->AddFieldsRequired(field);
17251732
}
17261733
if (FieldsRequired.test(+ENodeFields::MemoryDetailed)) {
1727-
request->AddFieldsRequired(38);
1734+
request->AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kMemoryStatsFieldNumber);
17281735
}
17291736
}
17301737
}

ydb/core/viewer/viewer_tenantinfo.h

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,13 @@ class TJsonTenantInfo : public TViewerPipeClient {
262262
void SendWhiteboardSystemStateRequest(const TNodeId nodeId) {
263263
Subscribers.insert(nodeId);
264264
if (SystemStateResponse.count(nodeId) == 0) {
265-
SystemStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, new TEvWhiteboard::TEvSystemStateRequest()));
265+
auto request = std::make_unique<NNodeWhiteboard::TEvWhiteboard::TEvSystemStateRequest>();
266+
for (auto field : {1, 2, 4, 5, 6, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 36, 37}) { // node_whiteboard.proto default fields
267+
request->Record.AddFieldsRequired(field);
268+
}
269+
request->Record.AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kCoresUsedFieldNumber);
270+
request->Record.AddFieldsRequired(NKikimrWhiteboard::TSystemStateInfo::kCoresTotalFieldNumber);
271+
SystemStateResponse.emplace(nodeId, MakeWhiteboardRequest(nodeId, request.release()));
266272
}
267273
}
268274

@@ -731,7 +737,8 @@ class TJsonTenantInfo : public TViewerPipeClient {
731737

732738
if (tablesStorageByType.empty() && entry.DomainDescription->Description.HasDiskSpaceUsage()) {
733739
tablesStorageByType[GuessStorageType(entry.DomainDescription->Description)] =
734-
entry.DomainDescription->Description.GetDiskSpaceUsage().GetTables().GetTotalSize();
740+
entry.DomainDescription->Description.GetDiskSpaceUsage().GetTables().GetTotalSize()
741+
+ entry.DomainDescription->Description.GetDiskSpaceUsage().GetTopics().GetDataSize();
735742
}
736743

737744
if (storageQuotasByType.empty()) {
@@ -784,14 +791,24 @@ class TJsonTenantInfo : public TViewerPipeClient {
784791
targetPoolStat->SetName(poolName);
785792
}
786793
double poolUsage = targetPoolStat->GetUsage() * targetPoolStat->GetThreads();
787-
poolUsage += poolStat.GetUsage() * poolStat.GetThreads();
794+
ui32 usageThreads = poolStat.GetLimit() ? poolStat.GetLimit() : poolStat.GetThreads();
795+
poolUsage += poolStat.GetUsage() * usageThreads;
788796
ui32 poolThreads = targetPoolStat->GetThreads() + poolStat.GetThreads();
789797
if (poolThreads != 0) {
790798
double threadUsage = poolUsage / poolThreads;
791799
targetPoolStat->SetUsage(threadUsage);
792800
targetPoolStat->SetThreads(poolThreads);
793801
}
794-
tenant.SetCoresUsed(tenant.GetCoresUsed() + poolStat.GetUsage() * poolStat.GetThreads());
802+
if (nodeInfo.GetCoresTotal() == 0) {
803+
tenant.SetCoresUsed(tenant.GetCoresUsed() + poolStat.GetUsage() * usageThreads);
804+
if (poolStat.GetName() != "IO") {
805+
tenant.SetCoresTotal(tenant.GetCoresTotal() + poolStat.GetThreads());
806+
}
807+
}
808+
}
809+
if (nodeInfo.GetCoresTotal() > 0) {
810+
tenant.SetCoresUsed(tenant.GetCoresUsed() + nodeInfo.GetCoresUsed());
811+
tenant.SetCoresTotal(tenant.GetCoresTotal() + nodeInfo.GetCoresTotal());
795812
}
796813
if (nodeInfo.HasMemoryUsed()) {
797814
tenant.SetMemoryUsed(tenant.GetMemoryUsed() + nodeInfo.GetMemoryUsed());

ydb/library/actors/helpers/pool_stats_collector.h

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ class TStatsCollectingActor : public TActorBootstrapped<TStatsCollectingActor> {
215215
TString Name;
216216
double Threads;
217217
double LimitThreads;
218+
double DefaultThreads;
218219

219220
void Init(NMonitoring::TDynamicCounters* group, const TString& poolName, ui32 threads) {
220221
LastElapsedSeconds = 0;
@@ -223,6 +224,7 @@ class TStatsCollectingActor : public TActorBootstrapped<TStatsCollectingActor> {
223224
Name = poolName;
224225
Threads = threads;
225226
LimitThreads = threads;
227+
DefaultThreads = threads;
226228

227229
PoolGroup = group->GetSubgroup("execpool", poolName);
228230

@@ -292,14 +294,15 @@ class TStatsCollectingActor : public TActorBootstrapped<TStatsCollectingActor> {
292294

293295
void Set(const TExecutorPoolStats& poolStats, const TExecutorThreadStats& stats) {
294296
#ifdef ACTORSLIB_COLLECT_EXEC_STATS
297+
double elapsedSeconds = ::NHPTimer::GetSeconds(stats.ElapsedTicks);
295298
*SentEvents = stats.SentEvents;
296299
*ReceivedEvents = stats.ReceivedEvents;
297300
*PreemptedEvents = stats.PreemptedEvents;
298301
*NonDeliveredEvents = stats.NonDeliveredEvents;
299302
*DestroyedActors = stats.PoolDestroyedActors;
300303
*EmptyMailboxActivation = stats.EmptyMailboxActivation;
301304
*CpuMicrosec = stats.CpuUs;
302-
*ElapsedMicrosec = ::NHPTimer::GetSeconds(stats.ElapsedTicks)*1000000;
305+
*ElapsedMicrosec = elapsedSeconds*1000000;
303306
*ParkedMicrosec = ::NHPTimer::GetSeconds(stats.ParkedTicks)*1000000;
304307
*ActorRegistrations = stats.PoolActorRegistrations;
305308
*ActorsAlive = stats.PoolActorRegistrations - stats.PoolDestroyedActors;
@@ -364,19 +367,17 @@ class TStatsCollectingActor : public TActorBootstrapped<TStatsCollectingActor> {
364367
double seconds = UsageTimer.PassedReset();
365368

366369
// TODO[serxa]: It doesn't account for contention. Use 1 - parkedTicksDelta / seconds / numThreads KIKIMR-11916
367-
const double currentThreadCount = poolStats.PotentialMaxThreadCount;
368-
const double elapsed = NHPTimer::GetSeconds(stats.ElapsedTicks);
369-
const double currentUsage = currentThreadCount > 0 ? ((elapsed - LastElapsedSeconds) / seconds / currentThreadCount) : 0;
370-
LastElapsedSeconds = elapsed;
370+
Threads = poolStats.CurrentThreadCount;
371+
LimitThreads = poolStats.PotentialMaxThreadCount;
372+
const double currentUsage = LimitThreads > 0 ? ((elapsedSeconds - LastElapsedSeconds) / seconds / LimitThreads) : 0;
371373

372374
// update usage factor according to smoothness
373375
const double smoothness = 0.5;
374376
Usage = currentUsage * smoothness + Usage * (1.0 - smoothness);
377+
LastElapsedSeconds = elapsedSeconds;
375378
#else
376379
Y_UNUSED(stats);
377380
#endif
378-
Threads = poolStats.CurrentThreadCount;
379-
LimitThreads = poolStats.PotentialMaxThreadCount;
380381
}
381382
};
382383

0 commit comments

Comments
 (0)