Skip to content

Commit c7ef100

Browse files
healthcheck config (#14860)
Co-authored-by: Ilnaz Nizametdinov <[email protected]>
1 parent 67fc041 commit c7ef100

File tree

8 files changed

+197
-10
lines changed

8 files changed

+197
-10
lines changed

ydb/core/base/appdata.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ struct TAppData::TImpl {
7070
NKikimrReplication::TReplicationDefaults ReplicationConfig;
7171
NKikimrProto::TDataIntegrityTrailsConfig DataIntegrityTrailsConfig;
7272
NKikimrConfig::TDataErasureConfig DataErasureConfig;
73+
NKikimrConfig::THealthCheckConfig HealthCheckConfig;
7374
};
7475

7576
TAppData::TAppData(
@@ -127,6 +128,7 @@ TAppData::TAppData(
127128
, ReplicationConfig(Impl->ReplicationConfig)
128129
, DataIntegrityTrailsConfig(Impl->DataIntegrityTrailsConfig)
129130
, DataErasureConfig(Impl->DataErasureConfig)
131+
, HealthCheckConfig(Impl->HealthCheckConfig)
130132
, KikimrShouldContinue(kikimrShouldContinue)
131133
, TracingConfigurator(MakeIntrusive<NJaegerTracing::TSamplingThrottlingConfigurator>(TimeProvider, RandomProvider))
132134
{}

ydb/core/base/appdata_fwd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ namespace NKikimrConfig {
7373
class TMetadataCacheConfig;
7474
class TMemoryControllerConfig;
7575
class TFeatureFlags;
76+
class THealthCheckConfig;
7677
}
7778

7879
namespace NKikimrReplication {
@@ -242,6 +243,7 @@ struct TAppData {
242243
NKikimrReplication::TReplicationDefaults& ReplicationConfig;
243244
NKikimrProto::TDataIntegrityTrailsConfig& DataIntegrityTrailsConfig;
244245
NKikimrConfig::TDataErasureConfig& DataErasureConfig;
246+
NKikimrConfig::THealthCheckConfig& HealthCheckConfig;
245247
bool EnforceUserTokenRequirement = false;
246248
bool EnforceUserTokenCheckRequirement = false; // check token if it was specified
247249
bool AllowHugeKeyValueDeletes = true; // delete when all clients limit deletes per request

ydb/core/cms/console/configs_dispatcher.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ const THashSet<ui32> DYNAMIC_KINDS({
6767
(ui32)NKikimrConsole::TConfigItem::BlobStorageConfigItem,
6868
(ui32)NKikimrConsole::TConfigItem::MetadataCacheConfigItem,
6969
(ui32)NKikimrConsole::TConfigItem::MemoryControllerConfigItem,
70+
(ui32)NKikimrConsole::TConfigItem::HealthCheckConfigItem,
7071
});
7172

7273
const THashSet<ui32> NON_YAML_KINDS({

ydb/core/driver_lib/run/run.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,6 +1205,10 @@ void TKikimrRunner::InitializeAppData(const TKikimrRunConfig& runConfig)
12051205
AppData->ReplicationConfig = runConfig.AppConfig.GetReplicationConfig();
12061206
}
12071207

1208+
if (runConfig.AppConfig.HasHealthCheckConfig()) {
1209+
AppData->HealthCheckConfig = runConfig.AppConfig.GetHealthCheckConfig();
1210+
}
1211+
12081212
// setup resource profiles
12091213
AppData->ResourceProfiles = new TResourceProfiles;
12101214
if (runConfig.AppConfig.GetBootstrapConfig().ResourceProfilesSize())

ydb/core/health_check/health_check.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <ydb/core/base/path.h>
1717
#include <ydb/core/base/statestorage.h>
1818
#include <ydb/core/base/tablet_pipe.h>
19+
#include <ydb/core/cms/console/configs_dispatcher.h>
1920
#include <ydb/core/mon/mon.h>
2021
#include <ydb/core/base/nameservice.h>
2122
#include <ydb/core/blobstorage/base/blobstorage_events.h>
@@ -28,6 +29,7 @@
2829
#include <ydb/core/util/tuples.h>
2930

3031
#include <ydb/core/protos/blobstorage_distributed_config.pb.h>
32+
#include <ydb/core/protos/config.pb.h>
3133
#include <ydb/core/sys_view/common/events.h>
3234

3335
#include <ydb/public/api/grpc/ydb_monitoring_v1.grpc.pb.h>
@@ -121,11 +123,12 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
121123
ui64 Cookie;
122124
NWilson::TSpan Span;
123125

124-
TSelfCheckRequest(const TActorId& sender, THolder<TEvSelfCheckRequest> request, ui64 cookie, NWilson::TTraceId&& traceId)
126+
TSelfCheckRequest(const TActorId& sender, THolder<TEvSelfCheckRequest> request, ui64 cookie, NWilson::TTraceId&& traceId, const NKikimrConfig::THealthCheckConfig& config)
125127
: Sender(sender)
126128
, Request(std::move(request))
127129
, Cookie(cookie)
128130
, Span(TComponentTracingLevels::TTablet::Basic, std::move(traceId), "health_check", NWilson::EFlags::AUTO_END)
131+
, HealthCheckConfig(config)
129132
{}
130133

131134
using TGroupId = ui32;
@@ -163,7 +166,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
163166
struct TNodeTabletState {
164167
struct TTabletStateSettings {
165168
TInstant AliveBarrier;
166-
ui32 MaxRestartsPerPeriod = 30; // per hour
169+
ui32 MaxRestartsPerPeriod; // per hour
167170
ui32 MaxTabletIdsStored = 10;
168171
bool ReportGoodTabletsIds = false;
169172
};
@@ -647,6 +650,8 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
647650
std::optional<TRequestResponse<TEvStateStorage::TEvBoardInfo>> DatabaseBoardInfo;
648651
THashSet<TNodeId> UnknownStaticGroups;
649652

653+
const NKikimrConfig::THealthCheckConfig& HealthCheckConfig;
654+
650655
std::vector<TNodeId> SubscribedNodeIds;
651656
THashSet<TNodeId> StorageNodeIds;
652657
THashSet<TNodeId> ComputeNodeIds;
@@ -1504,6 +1509,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
15041509
for (const auto& [hiveId, hiveResponse] : HiveInfo) {
15051510
if (hiveResponse.IsOk()) {
15061511
settings.AliveBarrier = TInstant::MilliSeconds(hiveResponse->Record.GetResponseTimestamp()) - TDuration::Minutes(5);
1512+
settings.MaxRestartsPerPeriod = HealthCheckConfig.GetTabletsRestartsPerPeriodOrangeThreshold();
15071513
for (const NKikimrHive::TTabletInfo& hiveTablet : hiveResponse->Record.GetTablets()) {
15081514
TSubDomainKey tenantId = TSubDomainKey(hiveTablet.GetObjectDomain());
15091515
auto itDomain = FilterDomainKey.find(tenantId);
@@ -1729,9 +1735,9 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
17291735
FillNodeInfo(nodeId, context.Location.mutable_compute()->mutable_node());
17301736

17311737
TSelfCheckContext rrContext(&context, "NODE_UPTIME");
1732-
if (databaseState.NodeRestartsPerPeriod[nodeId] >= 30) {
1738+
if (databaseState.NodeRestartsPerPeriod[nodeId] >= HealthCheckConfig.GetNodeRestartsPerPeriodOrangeThreshold()) {
17331739
rrContext.ReportStatus(Ydb::Monitoring::StatusFlag::ORANGE, "Node is restarting too often", ETags::Uptime);
1734-
} else if (databaseState.NodeRestartsPerPeriod[nodeId] >= 10) {
1740+
} else if (databaseState.NodeRestartsPerPeriod[nodeId] >= HealthCheckConfig.GetNodeRestartsPerPeriodYellowThreshold()) {
17351741
rrContext.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "The number of node restarts has increased", ETags::Uptime);
17361742
} else {
17371743
rrContext.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN);
@@ -1769,9 +1775,9 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
17691775
long timeDifferenceUs = nodeSystemState.GetMaxClockSkewWithPeerUs();
17701776
TDuration timeDifferenceDuration = TDuration::MicroSeconds(abs(timeDifferenceUs));
17711777
Ydb::Monitoring::StatusFlag::Status status;
1772-
if (timeDifferenceDuration > MAX_CLOCKSKEW_ORANGE_ISSUE_TIME) {
1778+
if (timeDifferenceDuration > TDuration::MicroSeconds(HealthCheckConfig.GetNodesTimeDifferenceUsOrangeThreshold())) {
17731779
status = Ydb::Monitoring::StatusFlag::ORANGE;
1774-
} else if (timeDifferenceDuration > MAX_CLOCKSKEW_YELLOW_ISSUE_TIME) {
1780+
} else if (timeDifferenceDuration > TDuration::MicroSeconds(HealthCheckConfig.GetNodesTimeDifferenceUsYellowThreshold())) {
17751781
status = Ydb::Monitoring::StatusFlag::YELLOW;
17761782
} else {
17771783
status = Ydb::Monitoring::StatusFlag::GREEN;
@@ -2921,9 +2927,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
29212927
}
29222928
}
29232929

2924-
const TDuration MAX_CLOCKSKEW_ORANGE_ISSUE_TIME = TDuration::MicroSeconds(25000);
2925-
const TDuration MAX_CLOCKSKEW_YELLOW_ISSUE_TIME = TDuration::MicroSeconds(5000);
2926-
29272930
void FillResult(TOverallStateContext context) {
29282931
if (IsSpecificDatabaseFilter()) {
29292932
FillDatabaseResult(context, FilterDatabase, DatabaseState[FilterDatabase]);
@@ -3252,12 +3255,16 @@ void TNodeCheckRequest<NMon::TEvHttpInfo>::Bootstrap() {
32523255
class THealthCheckService : public TActorBootstrapped<THealthCheckService> {
32533256
public:
32543257
static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::MONITORING_SERVICE; }
3258+
NKikimrConfig::THealthCheckConfig HealthCheckConfig;
32553259

32563260
THealthCheckService()
32573261
{
32583262
}
32593263

32603264
void Bootstrap() {
3265+
HealthCheckConfig.CopyFrom(AppData()->HealthCheckConfig);
3266+
Send(NConsole::MakeConfigsDispatcherID(SelfId().NodeId()),
3267+
new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest({NKikimrConsole::TConfigItem::HealthCheckConfigItem}));
32613268
TMon* mon = AppData()->Mon;
32623269
if (mon) {
32633270
mon->RegisterActorPage({
@@ -3270,8 +3277,16 @@ class THealthCheckService : public TActorBootstrapped<THealthCheckService> {
32703277
Become(&THealthCheckService::StateWork);
32713278
}
32723279

3280+
void Handle(NConsole::TEvConsole::TEvConfigNotificationRequest::TPtr& ev) {
3281+
const auto& record = ev->Get()->Record;
3282+
if (record.GetConfig().HasHealthCheckConfig()) {
3283+
HealthCheckConfig.CopyFrom(record.GetConfig().GetHealthCheckConfig());
3284+
}
3285+
Send(ev->Sender, new NConsole::TEvConsole::TEvConfigNotificationResponse(record), 0, ev->Cookie);
3286+
}
3287+
32733288
void Handle(TEvSelfCheckRequest::TPtr& ev) {
3274-
Register(new TSelfCheckRequest(ev->Sender, ev.Get()->Release(), ev->Cookie, std::move(ev->TraceId)));
3289+
Register(new TSelfCheckRequest(ev->Sender, ev.Get()->Release(), ev->Cookie, std::move(ev->TraceId), HealthCheckConfig));
32753290
}
32763291

32773292
std::shared_ptr<NYdbGrpc::TGRpcClientLow> GRpcClientLow;
@@ -3299,6 +3314,7 @@ class THealthCheckService : public TActorBootstrapped<THealthCheckService> {
32993314
hFunc(TEvSelfCheckRequest, Handle);
33003315
hFunc(TEvNodeCheckRequest, Handle);
33013316
hFunc(NMon::TEvHttpInfo, Handle);
3317+
hFunc(NConsole::TEvConsole::TEvConfigNotificationRequest, Handle);
33023318
cFunc(TEvents::TSystem::PoisonPill, PassAway);
33033319
}
33043320
}

ydb/core/health_check/health_check_ut.cpp

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <ydb/core/mind/hive/hive_events.h>
77
#include <ydb/core/node_whiteboard/node_whiteboard.h>
88
#include <ydb/core/blobstorage/base/blobstorage_events.h>
9+
#include <ydb/core/protos/config.pb.h>
910
#include <ydb/core/tx/schemeshard/schemeshard.h>
1011
#include "health_check.cpp"
1112

@@ -1961,5 +1962,156 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
19611962

19621963
UNIT_ASSERT(HasDeadTabletIssue(result));
19631964
}
1965+
1966+
void SendHealthCheckConfigUpdate(TTestActorRuntime &runtime, const TActorId& sender, const NKikimrConfig::THealthCheckConfig &cfg) {
1967+
auto *event = new NConsole::TEvConsole::TEvConfigureRequest;
1968+
1969+
event->Record.AddActions()->MutableRemoveConfigItems()->MutableCookieFilter()->AddCookies("cookie");
1970+
1971+
auto &item = *event->Record.AddActions()->MutableAddConfigItem()->MutableConfigItem();
1972+
item.MutableConfig()->MutableHealthCheckConfig()->CopyFrom(cfg);
1973+
item.SetCookie("cookie");
1974+
1975+
runtime.SendToPipe(MakeConsoleID(), sender, event, 0, GetPipeConfigWithRetries());
1976+
1977+
TAutoPtr<IEventHandle> handle;
1978+
auto record = runtime.GrabEdgeEvent<NConsole::TEvConsole::TEvConfigureResponse>(handle)->Record;
1979+
UNIT_ASSERT_VALUES_EQUAL(record.MutableStatus()->GetCode(), Ydb::StatusIds::SUCCESS);
1980+
}
1981+
1982+
void ChangeNodeRestartsPerPeriod(TTestActorRuntime &runtime, const TActorId& sender, const ui32 restartsYellow, const ui32 restartsOrange) {
1983+
NKikimrConfig::TAppConfig ext;
1984+
auto &cfg = *ext.MutableHealthCheckConfig();
1985+
cfg.SetNodeRestartsPerPeriodYellowThreshold(restartsYellow);
1986+
cfg.SetNodeRestartsPerPeriodOrangeThreshold(restartsOrange);
1987+
SendHealthCheckConfigUpdate(runtime, sender, cfg);
1988+
}
1989+
1990+
void TestConfigUpdateNodeRestartsPerPeriod(TTestActorRuntime &runtime, const TActorId& sender, const ui32 restartsYellow, const ui32 restartsOrange, const ui32 nodeId, Ydb::Monitoring::StatusFlag::Status expectedStatus) {
1991+
ChangeNodeRestartsPerPeriod(runtime, sender, restartsYellow, restartsOrange);
1992+
1993+
TAutoPtr<IEventHandle> handle;
1994+
auto *request = new NHealthCheck::TEvSelfCheckRequest;
1995+
request->Request.set_return_verbose_status(true);
1996+
request->Database = "/Root/database";
1997+
1998+
runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, request, 0));
1999+
auto result = runtime.GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle)->Result;
2000+
Ctest << result.ShortDebugString() << Endl;
2001+
2002+
const auto &database_status = result.database_status(0);
2003+
UNIT_ASSERT_VALUES_EQUAL(database_status.name(), "/Root/database");
2004+
UNIT_ASSERT_VALUES_EQUAL(database_status.compute().overall(), expectedStatus);
2005+
UNIT_ASSERT_VALUES_EQUAL(database_status.compute().nodes()[0].id(), ToString(nodeId));
2006+
}
2007+
2008+
Y_UNIT_TEST(HealthCheckConfigUpdate) {
2009+
TPortManager tp;
2010+
ui16 port = tp.GetPort(2134);
2011+
ui16 grpcPort = tp.GetPort(2135);
2012+
auto settings = TServerSettings(port)
2013+
.SetNodeCount(1)
2014+
.SetDynamicNodeCount(1)
2015+
.SetUseRealThreads(false)
2016+
.SetDomainName("Root");
2017+
2018+
TServer server(settings);
2019+
server.EnableGRpc(grpcPort);
2020+
TClient client(settings);
2021+
TTestActorRuntime& runtime = *server.GetRuntime();
2022+
TActorId sender = runtime.AllocateEdgeActor();
2023+
2024+
const ui32 nodeRestarts = 10;
2025+
const ui32 nodeId = runtime.GetNodeId(1);
2026+
auto observerFunc = [&](TAutoPtr<IEventHandle>& ev) {
2027+
switch (ev->GetTypeRewrite()) {
2028+
case NConsole::TEvConsole::EvGetTenantStatusResponse: {
2029+
auto *x = reinterpret_cast<NConsole::TEvConsole::TEvGetTenantStatusResponse::TPtr*>(&ev);
2030+
ChangeGetTenantStatusResponse(x, "/Root/database");
2031+
break;
2032+
}
2033+
case TEvTxProxySchemeCache::EvNavigateKeySetResult: {
2034+
auto *x = reinterpret_cast<TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr*>(&ev);
2035+
TSchemeCacheNavigate::TEntry& entry((*x)->Get()->Request->ResultSet.front());
2036+
const TString path = CanonizePath(entry.Path);
2037+
if (path == "/Root/database" || entry.TableId.PathId == SUBDOMAIN_KEY) {
2038+
entry.Status = TSchemeCacheNavigate::EStatus::Ok;
2039+
entry.Kind = TSchemeCacheNavigate::EKind::KindExtSubdomain;
2040+
entry.Path = {"Root", "database"};
2041+
entry.DomainInfo = MakeIntrusive<TDomainInfo>(SUBDOMAIN_KEY, SUBDOMAIN_KEY);
2042+
auto domains = runtime.GetAppData().DomainsInfo;
2043+
ui64 hiveId = domains->GetHive();
2044+
entry.DomainInfo->Params.SetHive(hiveId);
2045+
}
2046+
break;
2047+
}
2048+
case TEvHive::EvResponseHiveNodeStats: {
2049+
auto *x = reinterpret_cast<TEvHive::TEvResponseHiveNodeStats::TPtr*>(&ev);
2050+
auto &record = (*x)->Get()->Record;
2051+
record.ClearNodeStats();
2052+
auto *nodeStats = record.MutableNodeStats()->Add();
2053+
nodeStats->SetNodeId(nodeId);
2054+
nodeStats->SetRestartsPerPeriod(nodeRestarts);
2055+
nodeStats->MutableNodeDomain()->SetSchemeShard(SUBDOMAIN_KEY.OwnerId);
2056+
nodeStats->MutableNodeDomain()->SetPathId(SUBDOMAIN_KEY.LocalPathId);
2057+
break;
2058+
}
2059+
case TEvSchemeShard::EvDescribeSchemeResult: {
2060+
auto *x = reinterpret_cast<NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr*>(&ev);
2061+
auto record = (*x)->Get()->MutableRecord();
2062+
if (record->path() == "/Root/database") {
2063+
record->set_status(NKikimrScheme::StatusSuccess);
2064+
// no pools
2065+
}
2066+
break;
2067+
}
2068+
case TEvBlobStorage::EvControllerConfigResponse: {
2069+
auto *x = reinterpret_cast<TEvBlobStorage::TEvControllerConfigResponse::TPtr*>(&ev);
2070+
AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1));
2071+
break;
2072+
}
2073+
case NSysView::TEvSysView::EvGetVSlotsResponse: {
2074+
auto* x = reinterpret_cast<NSysView::TEvSysView::TEvGetVSlotsResponse::TPtr*>(&ev);
2075+
AddVSlotsToSysViewResponse(x, 1, TVDisks(1));
2076+
break;
2077+
}
2078+
case NSysView::TEvSysView::EvGetGroupsResponse: {
2079+
auto* x = reinterpret_cast<NSysView::TEvSysView::TEvGetGroupsResponse::TPtr*>(&ev);
2080+
AddGroupsToSysViewResponse(x);
2081+
break;
2082+
}
2083+
case NSysView::TEvSysView::EvGetStoragePoolsResponse: {
2084+
auto* x = reinterpret_cast<NSysView::TEvSysView::TEvGetStoragePoolsResponse::TPtr*>(&ev);
2085+
AddStoragePoolsToSysViewResponse(x);
2086+
break;
2087+
}
2088+
case TEvWhiteboard::EvSystemStateResponse: {
2089+
auto *x = reinterpret_cast<TEvWhiteboard::TEvSystemStateResponse::TPtr*>(&ev);
2090+
ClearLoadAverage(x);
2091+
break;
2092+
}
2093+
case TEvInterconnect::EvNodesInfo: {
2094+
auto *x = reinterpret_cast<TEvInterconnect::TEvNodesInfo::TPtr*>(&ev);
2095+
auto nodes = MakeIntrusive<TIntrusiveVector<TEvInterconnect::TNodeInfo>>((*x)->Get()->Nodes);
2096+
if (!nodes->empty()) {
2097+
nodes->erase(nodes->begin() + 1, nodes->end());
2098+
nodes->begin()->NodeId = nodeId;
2099+
}
2100+
auto newEv = IEventHandle::Downcast<TEvInterconnect::TEvNodesInfo>(
2101+
new IEventHandle((*x)->Recipient, (*x)->Sender, new TEvInterconnect::TEvNodesInfo(nodes))
2102+
);
2103+
x->Swap(newEv);
2104+
break;
2105+
}
2106+
}
2107+
2108+
return TTestActorRuntime::EEventAction::PROCESS;
2109+
};
2110+
runtime.SetObserverFunc(observerFunc);
2111+
2112+
TestConfigUpdateNodeRestartsPerPeriod(runtime, sender, nodeRestarts + 5, nodeRestarts + 10, nodeId, Ydb::Monitoring::StatusFlag::GREEN);
2113+
TestConfigUpdateNodeRestartsPerPeriod(runtime, sender, nodeRestarts / 2, nodeRestarts + 5, nodeId, Ydb::Monitoring::StatusFlag::YELLOW);
2114+
TestConfigUpdateNodeRestartsPerPeriod(runtime, sender, nodeRestarts / 5, nodeRestarts / 2, nodeId, Ydb::Monitoring::StatusFlag::ORANGE);
2115+
}
19642116
}
19652117
}

ydb/core/protos/config.proto

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1779,6 +1779,14 @@ message THiveConfig {
17791779
optional uint64 NodeRestartsForPenalty = 85 [default = 3];
17801780
}
17811781

1782+
message THealthCheckConfig {
1783+
optional uint32 NodeRestartsPerPeriodYellowThreshold = 1 [default = 10];
1784+
optional uint32 NodeRestartsPerPeriodOrangeThreshold = 2 [default = 30];
1785+
optional uint64 NodesTimeDifferenceUsYellowThreshold = 3 [default = 5000];
1786+
optional uint64 NodesTimeDifferenceUsOrangeThreshold = 4 [default = 25000];
1787+
optional uint32 TabletsRestartsPerPeriodOrangeThreshold = 5 [default = 30];
1788+
}
1789+
17821790
message TBlobCacheConfig {
17831791
optional uint64 MaxSizeBytes = 1 [default = 1073741824];
17841792
}
@@ -2270,6 +2278,7 @@ message TAppConfig {
22702278
optional TSelfManagementConfig SelfManagementConfig = 86;
22712279
optional NKikimrProto.TDataIntegrityTrailsConfig DataIntegrityTrailsConfig = 87;
22722280
optional TDataErasureConfig DataErasureConfig = 88;
2281+
optional THealthCheckConfig HealthCheckConfig = 89;
22732282

22742283
repeated TNamedConfig NamedConfigs = 100;
22752284
optional string ClusterYamlConfig = 101;

ydb/core/protos/console_config.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ message TConfigItem {
143143
GroupedMemoryLimiterConfig = 82;
144144
ReplicationConfigItem = 83;
145145
CompPrioritiesConfig = 85;
146+
HealthCheckConfigItem = 89;
146147

147148
NamedConfigsItem = 100;
148149
ClusterYamlConfigItem = 101;

0 commit comments

Comments
 (0)