2
2
3
3
#include < ydb/core/base/appdata.h>
4
4
#include < ydb/core/base/counters.h>
5
+ #include < ydb/core/graph/api/events.h>
6
+ #include < ydb/core/graph/api/service.h>
5
7
#include < ydb/library/services/services.pb.h>
6
8
#include < ydb/library/actors/core/actor_bootstrapped.h>
7
9
@@ -13,18 +15,26 @@ class TExtCountersUpdaterActor
13
15
: public TActorBootstrapped<TExtCountersUpdaterActor>
14
16
{
15
17
using TCounterPtr = ::NMonitoring::TDynamicCounters::TCounterPtr;
18
+ using THistogramPtr = ::NMonitoring::THistogramPtr;
19
+ using THistogramSnapshotPtr = ::NMonitoring::IHistogramSnapshotPtr;
16
20
17
21
const TExtCountersConfig Config;
18
22
19
23
TCounterPtr MemoryUsedBytes;
20
24
TCounterPtr MemoryLimitBytes;
25
+ TCounterPtr StorageUsedBytes;
21
26
TVector<TCounterPtr> CpuUsedCorePercents;
22
27
TVector<TCounterPtr> CpuLimitCorePercents;
28
+ THistogramPtr ExecuteLatencyMs;
23
29
24
30
TCounterPtr AnonRssSize;
25
31
TCounterPtr CGroupMemLimit;
26
32
TVector<TCounterPtr> PoolElapsedMicrosec;
27
33
TVector<TCounterPtr> PoolCurrentThreadCount;
34
+ TVector<ui64> PoolElapsedMicrosecPrevValue;
35
+ TVector<ui64> ExecuteLatencyMsValues;
36
+ TVector<ui64> ExecuteLatencyMsPrevValues;
37
+ TVector<ui64> ExecuteLatencyMsBounds;
28
38
29
39
public:
30
40
static constexpr NKikimrServices::TActivity::EType ActorActivityType () {
@@ -42,6 +52,8 @@ class TExtCountersUpdaterActor
42
52
" resources.memory.used_bytes" , false );
43
53
MemoryLimitBytes = ydbGroup->GetNamedCounter (" name" ,
44
54
" resources.memory.limit_bytes" , false );
55
+ StorageUsedBytes = ydbGroup->GetNamedCounter (" name" ,
56
+ " resources.storage.used_bytes" , false );
45
57
46
58
auto poolCount = Config.Pools .size ();
47
59
CpuUsedCorePercents.resize (poolCount);
@@ -55,6 +67,8 @@ class TExtCountersUpdaterActor
55
67
" resources.cpu.limit_core_percents" , false );
56
68
}
57
69
70
+ ExecuteLatencyMs = ydbGroup->FindNamedHistogram (" name" , " table.query.execution.latency_milliseconds" );
71
+
58
72
Schedule (TDuration::Seconds (1 ), new TEvents::TEvWakeup);
59
73
Become (&TThis::StateWork);
60
74
}
@@ -69,29 +83,41 @@ class TExtCountersUpdaterActor
69
83
70
84
PoolElapsedMicrosec.resize (Config.Pools .size ());
71
85
PoolCurrentThreadCount.resize (Config.Pools .size ());
86
+ PoolElapsedMicrosecPrevValue.resize (Config.Pools .size ());
72
87
for (size_t i = 0 ; i < Config.Pools .size (); ++i) {
73
88
auto poolGroup = utilsGroup->FindSubgroup (" execpool" , Config.Pools [i].Name );
74
89
if (poolGroup) {
75
90
PoolElapsedMicrosec[i] = poolGroup->FindCounter (" ElapsedMicrosec" );
76
91
PoolCurrentThreadCount[i] = poolGroup->FindCounter (" CurrentThreadCount" );
92
+ if (PoolElapsedMicrosec[i]) {
93
+ PoolElapsedMicrosecPrevValue[i] = PoolElapsedMicrosec[i]->Val ();
94
+ }
77
95
}
78
96
}
79
97
}
80
98
}
81
99
82
100
void Transform () {
83
101
Initialize ();
84
-
102
+ auto metrics (MakeHolder<NGraph::TEvGraph::TEvSendMetrics>());
85
103
if (AnonRssSize) {
86
104
MemoryUsedBytes->Set (AnonRssSize->Val ());
105
+ metrics->AddMetric (" resources.memory.used_bytes" , AnonRssSize->Val ());
87
106
}
88
107
if (CGroupMemLimit) {
89
108
MemoryLimitBytes->Set (CGroupMemLimit->Val ());
90
109
}
110
+ metrics->AddMetric (" resources.storage.used_bytes" , StorageUsedBytes->Val ());
111
+ double cpuUsage = 0 ;
91
112
for (size_t i = 0 ; i < Config.Pools .size (); ++i) {
92
113
if (PoolElapsedMicrosec[i]) {
93
- double usedCore = PoolElapsedMicrosec[i]->Val () / 10000 .;
114
+ auto elapsedMs = PoolElapsedMicrosec[i]->Val ();
115
+ double usedCore = elapsedMs / 10000 .;
94
116
CpuUsedCorePercents[i]->Set (usedCore);
117
+ if (PoolElapsedMicrosecPrevValue[i] != 0 ) {
118
+ cpuUsage += (elapsedMs - PoolElapsedMicrosecPrevValue[i]) / 1000000 .;
119
+ }
120
+ PoolElapsedMicrosecPrevValue[i] = elapsedMs;
95
121
}
96
122
if (PoolCurrentThreadCount[i] && PoolCurrentThreadCount[i]->Val ()) {
97
123
double limitCore = PoolCurrentThreadCount[i]->Val () * 100 ;
@@ -101,6 +127,47 @@ class TExtCountersUpdaterActor
101
127
CpuLimitCorePercents[i]->Set (limitCore);
102
128
}
103
129
}
130
+ metrics->AddMetric (" resources.cpu.usage" , cpuUsage);
131
+ if (ExecuteLatencyMs) {
132
+ THistogramSnapshotPtr snapshot = ExecuteLatencyMs->Snapshot ();
133
+ ui32 count = snapshot->Count ();
134
+ if (ExecuteLatencyMsValues.empty ()) {
135
+ ExecuteLatencyMsValues.resize (count);
136
+ ExecuteLatencyMsPrevValues.resize (count);
137
+ ExecuteLatencyMsBounds.resize (count);
138
+ }
139
+ ui64 total = 0 ;
140
+ for (ui32 n = 0 ; n < count; ++n) {
141
+ ui64 value = snapshot->Value (n);;
142
+ ui64 diff = value - ExecuteLatencyMsPrevValues[n];
143
+ total += diff;
144
+ ExecuteLatencyMsValues[n] = diff;
145
+ ExecuteLatencyMsPrevValues[n] = value;
146
+ if (ExecuteLatencyMsBounds[n] == 0 ) {
147
+ ExecuteLatencyMsBounds[n] = snapshot->UpperBound (n);
148
+ }
149
+ }
150
+ metrics->AddMetric (" queries.requests" , total);
151
+ if (total != 0 ) {
152
+ double p50 = NGraph::GetTimingForPercentile (50 , ExecuteLatencyMsValues, ExecuteLatencyMsBounds, total);
153
+ if (!isnan (p50)) {
154
+ metrics->AddMetric (" queries.latencies.p50" , p50);
155
+ }
156
+ double p75 = NGraph::GetTimingForPercentile (75 , ExecuteLatencyMsValues, ExecuteLatencyMsBounds, total);
157
+ if (!isnan (p75)) {
158
+ metrics->AddMetric (" queries.latencies.p75" , p75);
159
+ }
160
+ double p90 = NGraph::GetTimingForPercentile (90 , ExecuteLatencyMsValues, ExecuteLatencyMsBounds, total);
161
+ if (!isnan (p90)) {
162
+ metrics->AddMetric (" queries.latencies.p90" , p90);
163
+ }
164
+ double p99 = NGraph::GetTimingForPercentile (99 , ExecuteLatencyMsValues, ExecuteLatencyMsBounds, total);
165
+ if (!isnan (p99)) {
166
+ metrics->AddMetric (" queries.latencies.p99" , p99);
167
+ }
168
+ }
169
+ }
170
+ Send (NGraph::MakeGraphServiceId (), metrics.Release ());
104
171
}
105
172
106
173
void HandleWakeup () {
0 commit comments