1
1
#include " aggregator_impl.h"
2
2
3
3
#include < ydb/core/engine/minikql/flat_local_tx_factory.h>
4
+ #include < ydb/core/statistics/stat_service.h>
4
5
5
6
#include < library/cpp/monlib/service/pages/templates.h>
6
7
7
8
namespace NKikimr ::NStat {
8
9
9
- TStatisticsAggregator::TStatisticsAggregator (const NActors::TActorId& tablet, TTabletStorageInfo* info)
10
+ TStatisticsAggregator::TStatisticsAggregator (const NActors::TActorId& tablet, TTabletStorageInfo* info, bool forTests )
10
11
: TActor(&TThis::StateInit)
11
12
, TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory)
12
- {}
13
+ {
14
+ PropagateInterval = forTests ? TDuration::Seconds (5 ) : TDuration::Minutes (3 );
15
+
16
+ auto seed = std::random_device{}();
17
+ RandomGenerator.seed (seed);
18
+ }
13
19
14
20
void TStatisticsAggregator::OnDetach (const TActorContext& ctx) {
15
21
Die (ctx);
@@ -29,8 +35,241 @@ void TStatisticsAggregator::DefaultSignalTabletActive(const TActorContext& ctx)
29
35
Y_UNUSED (ctx);
30
36
}
31
37
32
- void TStatisticsAggregator::Handle (TEvPrivate::TEvProcess::TPtr&) {
33
- SA_LOG_D (" [" << TabletID () << " ] Handle TEvPrivate::TEvProcess" );
38
+ void TStatisticsAggregator::Handle (TEvTabletPipe::TEvServerConnected::TPtr &ev) {
39
+ auto pipeServerId = ev->Get ()->ServerId ;
40
+
41
+ SA_LOG_D (" [" << TabletID () << " ] EvServerConnected"
42
+ << " , pipe server id = " << pipeServerId);
43
+ }
44
+
45
+ void TStatisticsAggregator::Handle (TEvTabletPipe::TEvServerDisconnected::TPtr &ev) {
46
+ auto pipeServerId = ev->Get ()->ServerId ;
47
+
48
+ SA_LOG_D (" [" << TabletID () << " ] EvServerDisconnected"
49
+ << " , pipe server id = " << pipeServerId);
50
+
51
+ auto itNodeServer = NodePipes.find (pipeServerId);
52
+ if (itNodeServer != NodePipes.end ()) {
53
+ auto nodeId = itNodeServer->second ;
54
+ auto itNode = Nodes.find (nodeId);
55
+ if (itNode != Nodes.end ()) {
56
+ --itNode->second ;
57
+ if (itNode->second == 0 ) {
58
+ Nodes.erase (itNode);
59
+ }
60
+ }
61
+ NodePipes.erase (itNodeServer);
62
+ return ;
63
+ }
64
+
65
+ auto itShardServer = SchemeShardPipes.find (pipeServerId);
66
+ if (itShardServer != SchemeShardPipes.end ()) {
67
+ auto ssId = itShardServer->second ;
68
+ auto itShard = SchemeShards.find (ssId);
69
+ if (itShard != SchemeShards.end ()) {
70
+ --itShard->second ;
71
+ if (itShard->second == 0 ) {
72
+ SchemeShards.erase (itShard);
73
+ }
74
+ }
75
+ SchemeShardPipes.erase (itShardServer);
76
+ return ;
77
+ }
78
+ }
79
+
80
+ void TStatisticsAggregator::Handle (TEvStatistics::TEvConnectNode::TPtr& ev) {
81
+ const auto & record = ev->Get ()->Record ;
82
+ const TNodeId nodeId = record.GetNodeId ();
83
+ auto pipeServerId = ev->Recipient ;
84
+
85
+ SA_LOG_D (" [" << TabletID () << " ] EvConnectNode"
86
+ << " , pipe server id = " << pipeServerId
87
+ << " , node id = " << nodeId
88
+ << " , have schemeshards count = " << record.HaveSchemeShardsSize ()
89
+ << " , need schemeshards count = " << record.NeedSchemeShardsSize ());
90
+
91
+ if (NodePipes.find (pipeServerId) == NodePipes.end ()) {
92
+ NodePipes[pipeServerId] = nodeId;
93
+ ++Nodes[nodeId];
94
+ }
95
+
96
+ for (const auto & ssEntry : record.GetHaveSchemeShards ()) {
97
+ RequestedSchemeShards.insert (ssEntry.GetSchemeShardId ());
98
+ }
99
+
100
+ if (!IsPropagateInFlight) {
101
+ Schedule (PropagateInterval, new TEvPrivate::TEvPropagate ());
102
+ IsPropagateInFlight = true ;
103
+ }
104
+
105
+ std::vector<TSSId> ssIds;
106
+ ssIds.reserve (record.NeedSchemeShardsSize ());
107
+ for (const auto & ssId : record.GetNeedSchemeShards ()) {
108
+ ssIds.push_back (ssId);
109
+ RequestedSchemeShards.insert (ssId);
110
+ }
111
+
112
+ ProcessRequests (nodeId, ssIds);
113
+ }
114
+
115
+ void TStatisticsAggregator::Handle (TEvStatistics::TEvRequestStats::TPtr& ev) {
116
+ const auto & record = ev->Get ()->Record ;
117
+ const auto nodeId = record.GetNodeId ();
118
+
119
+ SA_LOG_D (" [" << TabletID () << " ] EvRequestStats"
120
+ << " , node id = " << nodeId
121
+ << " , schemeshard count = " << record.NeedSchemeShardsSize ());
122
+
123
+ std::vector<TSSId> ssIds;
124
+ ssIds.reserve (record.NeedSchemeShardsSize ());
125
+ for (const auto & ssId : record.GetNeedSchemeShards ()) {
126
+ ssIds.push_back (ssId);
127
+ }
128
+
129
+ ProcessRequests (nodeId, ssIds);
130
+ }
131
+
132
+ void TStatisticsAggregator::Handle (TEvStatistics::TEvConnectSchemeShard::TPtr& ev) {
133
+ const auto & record = ev->Get ()->Record ;
134
+ const TSSId schemeShardId = record.GetSchemeShardId ();
135
+ auto pipeServerId = ev->Recipient ;
136
+
137
+ if (SchemeShardPipes.find (pipeServerId) == SchemeShardPipes.end ()) {
138
+ SchemeShardPipes[pipeServerId] = schemeShardId;
139
+ ++SchemeShards[schemeShardId];
140
+ }
141
+
142
+ SA_LOG_D (" [" << TabletID () << " ] EvConnectSchemeShard"
143
+ << " , pipe server id = " << pipeServerId
144
+ << " , schemeshard id = " << schemeShardId);
145
+ }
146
+
147
+ void TStatisticsAggregator::Handle (TEvPrivate::TEvFastPropagateCheck::TPtr&) {
148
+ SA_LOG_D (" [" << TabletID () << " ] EvFastPropagateCheck" );
149
+
150
+ PropagateFastStatistics ();
151
+
152
+ FastCheckInFlight = false ;
153
+ FastCounter = StatsOptimizeFirstNodesCount;
154
+ FastNodes.clear ();
155
+ FastSchemeShards.clear ();
156
+ }
157
+
158
+ void TStatisticsAggregator::Handle (TEvPrivate::TEvPropagate::TPtr&) {
159
+ SA_LOG_D (" [" << TabletID () << " ] EvPropagate" );
160
+
161
+ PropagateStatistics ();
162
+
163
+ Schedule (PropagateInterval, new TEvPrivate::TEvPropagate ());
164
+ }
165
+
166
+ void TStatisticsAggregator::ProcessRequests (TNodeId nodeId, const std::vector<TSSId>& ssIds) {
167
+ if (FastCounter > 0 ) {
168
+ --FastCounter;
169
+ SendStatisticsToNode (nodeId, ssIds);
170
+ } else {
171
+ FastNodes.insert (nodeId);
172
+ for (const auto & ssId : ssIds) {
173
+ FastSchemeShards.insert (ssId);
174
+ }
175
+ if (!FastCheckInFlight) {
176
+ Schedule (TDuration::MilliSeconds (100 ), new TEvPrivate::TEvFastPropagateCheck ());
177
+ FastCheckInFlight = true ;
178
+ }
179
+ }
180
+ }
181
+
182
+ void TStatisticsAggregator::SendStatisticsToNode (TNodeId nodeId, const std::vector<TSSId>& ssIds) {
183
+ SA_LOG_D (" [" << TabletID () << " ] SendStatisticsToNode()"
184
+ << " , node id = " << nodeId
185
+ << " , schemeshard count = " << ssIds.size ());
186
+
187
+ std::vector<TNodeId> nodeIds;
188
+ nodeIds.push_back (nodeId);
189
+
190
+ PropagateStatisticsImpl (nodeIds, ssIds);
191
+ }
192
+
193
+ void TStatisticsAggregator::PropagateStatistics () {
194
+ SA_LOG_D (" [" << TabletID () << " ] PropagateStatistics()"
195
+ << " , node count = " << Nodes.size ()
196
+ << " , schemeshard count = " << RequestedSchemeShards.size ());
197
+
198
+ if (Nodes.empty () || RequestedSchemeShards.empty ()) {
199
+ return ;
200
+ }
201
+
202
+ std::vector<TNodeId> nodeIds;
203
+ nodeIds.reserve (Nodes.size ());
204
+ for (const auto & [nodeId, _] : Nodes) {
205
+ nodeIds.push_back (nodeId);
206
+ }
207
+ std::shuffle (std::begin (nodeIds), std::end (nodeIds), RandomGenerator);
208
+
209
+ std::vector<TSSId> ssIds;
210
+ ssIds.reserve (RequestedSchemeShards.size ());
211
+ for (const auto & ssId : RequestedSchemeShards) {
212
+ ssIds.push_back (ssId);
213
+ }
214
+
215
+ PropagateStatisticsImpl (nodeIds, ssIds);
216
+ }
217
+
218
+ void TStatisticsAggregator::PropagateFastStatistics () {
219
+ SA_LOG_D (" [" << TabletID () << " ] PropagateFastStatistics()"
220
+ << " , node count = " << FastNodes.size ()
221
+ << " , schemeshard count = " << FastSchemeShards.size ());
222
+
223
+ if (FastNodes.empty () || FastSchemeShards.empty ()) {
224
+ return ;
225
+ }
226
+
227
+ std::vector<TNodeId> nodeIds;
228
+ nodeIds.reserve (FastNodes.size ());
229
+ for (const auto & nodeId : FastNodes) {
230
+ nodeIds.push_back (nodeId);
231
+ }
232
+ std::shuffle (std::begin (nodeIds), std::end (nodeIds), RandomGenerator);
233
+
234
+ std::vector<TSSId> ssIds;
235
+ ssIds.reserve (FastSchemeShards.size ());
236
+ for (const auto & ssId : FastSchemeShards) {
237
+ ssIds.push_back (ssId);
238
+ }
239
+
240
+ PropagateStatisticsImpl (nodeIds, ssIds);
241
+ }
242
+
243
+ void TStatisticsAggregator::PropagateStatisticsImpl (
244
+ const std::vector<TNodeId>& nodeIds, const std::vector<TSSId>& ssIds)
245
+ {
246
+ TNodeId leadingNodeId = nodeIds[0 ];
247
+
248
+ for (size_t index = 0 ; index < ssIds.size (); ) {
249
+ auto propagate = std::make_unique<TEvStatistics::TEvPropagateStatistics>();
250
+ auto * record = propagate->MutableRecord ();
251
+ record->MutableNodeIds ()->Reserve (nodeIds.size () - 1 );
252
+ for (size_t i = 1 ; i < nodeIds.size (); ++i) {
253
+ record->AddNodeIds (nodeIds[i]);
254
+ }
255
+ for (size_t size = 0 ; index < ssIds.size (); ++index ) {
256
+ auto ssId = ssIds[index ];
257
+ auto * entry = record->AddEntries ();
258
+ entry->SetSchemeShardId (ssId);
259
+ auto itStats = BaseStats.find (ssId);
260
+ if (itStats != BaseStats.end ()) {
261
+ entry->SetStats (itStats->second );
262
+ size += itStats->second .size ();
263
+ } else {
264
+ entry->SetStats (TString ()); // stats are not sent from SA yet
265
+ }
266
+ if (size >= StatsSizeLimitBytes) {
267
+ ++index ;
268
+ break ;
269
+ }
270
+ }
271
+ Send (NStat::MakeStatServiceID (leadingNodeId), propagate.release ());
272
+ }
34
273
}
35
274
36
275
void TStatisticsAggregator::PersistSysParam (NIceDb::TNiceDb& db, ui64 id, const TString& value) {
0 commit comments