Skip to content

Commit ae12905

Browse files
authored
Health check for exclusive dynamic nodes KIKIMR-20818 (#1223)
* Health check for exclusive dynamic nodes KIKIMR-20818
1 parent 59147eb commit ae12905

File tree

3 files changed

+919
-22
lines changed

3 files changed

+919
-22
lines changed

ydb/core/health_check/health_check.cpp

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -228,12 +228,13 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
228228
TTabletId HiveId = {};
229229
TPathId ResourcePathId = {};
230230
TVector<TNodeId> ComputeNodeIds;
231-
TVector<TString> StoragePoolNames;
231+
THashSet<TString> StoragePoolNames;
232232
THashMap<std::pair<TTabletId, NNodeWhiteboard::TFollowerId>, const NKikimrHive::TTabletInfo*> MergedTabletState;
233233
THashMap<TNodeId, TNodeTabletState> MergedNodeTabletState;
234234
THashMap<TNodeId, ui32> NodeRestartsPerPeriod;
235235
ui64 StorageQuota;
236236
ui64 StorageUsage;
237+
TMaybeServerlessComputeResourcesMode ServerlessComputeResourcesMode;
237238
};
238239

239240
struct TSelfCheckResult {
@@ -514,7 +515,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
514515
TDuration Timeout = TDuration::MilliSeconds(20000);
515516
static constexpr TStringBuf STATIC_STORAGE_POOL_NAME = "static";
516517

517-
bool IsSpecificDatabaseFilter() {
518+
bool IsSpecificDatabaseFilter() const {
518519
return FilterDatabase && FilterDatabase != DomainPath;
519520
}
520521

@@ -593,7 +594,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
593594
StoragePoolState[storagePoolName].Groups.emplace(group.groupid());
594595

595596
if (!IsSpecificDatabaseFilter()) {
596-
DatabaseState[DomainPath].StoragePoolNames.emplace_back(storagePoolName);
597+
DatabaseState[DomainPath].StoragePoolNames.emplace(storagePoolName);
597598
}
598599
}
599600
}
@@ -869,12 +870,12 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
869870
TDatabaseState& state(DatabaseState[path]);
870871
for (const auto& storagePool : ev->Get()->GetRecord().pathdescription().domaindescription().storagepools()) {
871872
TString storagePoolName = storagePool.name();
872-
state.StoragePoolNames.emplace_back(storagePoolName);
873+
state.StoragePoolNames.emplace(storagePoolName);
873874
StoragePoolState[storagePoolName].Kind = storagePool.kind();
874875
RequestSelectGroups(storagePoolName);
875876
}
876877
if (path == DomainPath) {
877-
state.StoragePoolNames.emplace_back(STATIC_STORAGE_POOL_NAME);
878+
state.StoragePoolNames.emplace(STATIC_STORAGE_POOL_NAME);
878879
}
879880
state.StorageUsage = ev->Get()->GetRecord().pathdescription().domaindescription().diskspaceusage().tables().totalsize();
880881
state.StorageQuota = ev->Get()->GetRecord().pathdescription().domaindescription().databasequotas().data_size_hard_quota();
@@ -888,12 +889,19 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
888889
if (ev->Get()->Request->ResultSet.size() == 1 && ev->Get()->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) {
889890
auto domainInfo = ev->Get()->Request->ResultSet.begin()->DomainInfo;
890891
TString path = CanonizePath(ev->Get()->Request->ResultSet.begin()->Path);
891-
892-
if (domainInfo->DomainKey != domainInfo->ResourcesDomainKey) {
893-
if (SharedDatabases.emplace(domainInfo->ResourcesDomainKey, path).second) {
894-
RequestSchemeCacheNavigate(domainInfo->ResourcesDomainKey);
892+
if (domainInfo->IsServerless()) {
893+
if (NeedHealthCheckForServerless(domainInfo)) {
894+
if (SharedDatabases.emplace(domainInfo->ResourcesDomainKey, path).second) {
895+
RequestSchemeCacheNavigate(domainInfo->ResourcesDomainKey);
896+
}
897+
DatabaseState[path].ResourcePathId = domainInfo->ResourcesDomainKey;
898+
DatabaseState[path].ServerlessComputeResourcesMode = domainInfo->ServerlessComputeResourcesMode;
899+
} else {
900+
DatabaseState.erase(path);
901+
DatabaseStatusByPath.erase(path);
902+
RequestDone("TEvNavigateKeySetResult");
903+
return;
895904
}
896-
DatabaseState[path].ResourcePathId = domainInfo->ResourcesDomainKey;
897905
}
898906
TTabletId hiveId = domainInfo->Params.GetHive();
899907
if (hiveId) {
@@ -918,6 +926,11 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
918926
RequestDone("TEvNavigateKeySetResult");
919927
}
920928

929+
bool NeedHealthCheckForServerless(TIntrusivePtr<NSchemeCache::TDomainInfo> domainInfo) const {
930+
return IsSpecificDatabaseFilter()
931+
|| domainInfo->ServerlessComputeResourcesMode == NKikimrSubDomains::EServerlessComputeResourcesModeExclusive;
932+
}
933+
921934
void Handle(TEvHive::TEvResponseHiveDomainStats::TPtr& ev) {
922935
TTabletId hiveId = TabletRequests.CompleteRequest(ev->Cookie);
923936
for (const NKikimrHive::THiveDomainStats& hiveStat : ev->Get()->Record.GetDomainStats()) {
@@ -951,15 +964,9 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
951964
Ydb::Cms::GetDatabaseStatusResult getTenantStatusResult;
952965
operation.result().UnpackTo(&getTenantStatusResult);
953966
TString path = getTenantStatusResult.path();
954-
955-
bool ignoreServerlessDatabases = !IsSpecificDatabaseFilter(); // we don't ignore sl database if it was exactly specified
956-
if (getTenantStatusResult.has_serverless_resources() && ignoreServerlessDatabases) {
957-
DatabaseState.erase(path);
958-
} else {
959-
DatabaseStatusByPath[path] = std::move(getTenantStatusResult);
960-
DatabaseState[path];
961-
RequestSchemeCacheNavigate(path);
962-
}
967+
DatabaseStatusByPath[path] = std::move(getTenantStatusResult);
968+
DatabaseState[path];
969+
RequestSchemeCacheNavigate(path);
963970
}
964971
RequestDone("TEvGetTenantStatusResponse");
965972
}
@@ -1298,7 +1305,9 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
12981305

12991306
void FillCompute(TDatabaseState& databaseState, Ydb::Monitoring::ComputeStatus& computeStatus, TSelfCheckContext context) {
13001307
TVector<TNodeId>* computeNodeIds = &databaseState.ComputeNodeIds;
1301-
if (databaseState.ResourcePathId) {
1308+
if (databaseState.ResourcePathId
1309+
&& databaseState.ServerlessComputeResourcesMode != NKikimrSubDomains::EServerlessComputeResourcesModeExclusive)
1310+
{
13021311
auto itDatabase = FilterDomainKey.find(TSubDomainKey(databaseState.ResourcePathId.OwnerId, databaseState.ResourcePathId.LocalPathId));
13031312
if (itDatabase != FilterDomainKey.end()) {
13041313
const TString& sharedDatabaseName = itDatabase->second;
@@ -2124,7 +2133,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
21242133
TDatabaseState unknownDatabase;
21252134
for (auto& [name, pool] : StoragePoolState) {
21262135
if (StoragePoolSeen.count(name) == 0) {
2127-
unknownDatabase.StoragePoolNames.push_back(name);
2136+
unknownDatabase.StoragePoolNames.insert(name);
21282137
}
21292138
}
21302139
if (!unknownDatabase.StoragePoolNames.empty()) {

0 commit comments

Comments
 (0)