Skip to content

Commit 3cd00af

Browse files
added issue
1 parent 1d225a1 commit 3cd00af

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

ydb/core/health_check/health_check.cpp

+7-2
Original file line numberDiff line numberDiff line change
@@ -1248,9 +1248,13 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
12481248
}
12491249
}
12501250

1251-
void FillComputeNodeStatus(TNodeId nodeId, Ydb::Monitoring::ComputeNodeStatus& computeNodeStatus, TSelfCheckContext context) {
1251+
void FillComputeNodeStatus(TDatabaseState& databaseState,TNodeId nodeId, Ydb::Monitoring::ComputeNodeStatus& computeNodeStatus, TSelfCheckContext context) {
12521252
FillNodeInfo(nodeId, context.Location.mutable_compute()->mutable_node());
12531253

1254+
if (databaseState.RestartsPerPeriod[nodeId] > 30) {
1255+
context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Node is restarting too often", ETags::NodeState);
1256+
}
1257+
12541258
auto itNodeSystemState = MergedNodeSystemState.find(nodeId);
12551259
if (itNodeSystemState != MergedNodeSystemState.end()) {
12561260
const NKikimrWhiteboard::TSystemStateInfo& nodeSystemState(*itNodeSystemState->second);
@@ -1308,8 +1312,9 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
13081312
}
13091313
for (TNodeId nodeId : *computeNodeIds) {
13101314
auto& computeNode = *computeStatus.add_nodes();
1311-
FillComputeNodeStatus(nodeId, computeNode, {&context, "COMPUTE_NODE"});
1315+
FillComputeNodeStatus(databaseState, nodeId, computeNode, {&context, "COMPUTE_NODE"});
13121316
}
1317+
context.ReportWithMaxChildStatus("Some nodes are restarting too often", ETags::ComputeState, {ETags::NodeState});
13131318
context.ReportWithMaxChildStatus("Compute is overloaded", ETags::ComputeState, {ETags::OverloadState});
13141319
Ydb::Monitoring::StatusFlag::Status tabletsStatus = Ydb::Monitoring::StatusFlag::GREEN;
13151320
computeNodeIds->push_back(0); // for tablets without node

0 commit comments

Comments
 (0)