Skip to content

Commit 81472fb

Browse files
Merge 926f823 into 5d11c54
2 parents 5d11c54 + 926f823 commit 81472fb

File tree

2 files changed

+134
-211
lines changed

2 files changed

+134
-211
lines changed

ydb/core/health_check/health_check.cpp

+38-90
Original file line numberDiff line numberDiff line change
@@ -219,21 +219,20 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
219219
};
220220

221221
struct TStoragePoolState {
222-
TString Kind;
222+
TString Name;
223223
THashSet<TGroupId> Groups;
224-
THashSet<TGroupId> AuthenticGroups;
225224
};
226225

227226
struct TDatabaseState {
228227
TTabletId HiveId = {};
229228
TPathId ResourcePathId = {};
230229
TVector<TNodeId> ComputeNodeIds;
231-
THashSet<TString> StoragePoolNames;
230+
THashSet<ui64> StoragePools;
232231
THashMap<std::pair<TTabletId, NNodeWhiteboard::TFollowerId>, const NKikimrHive::TTabletInfo*> MergedTabletState;
233232
THashMap<TNodeId, TNodeTabletState> MergedNodeTabletState;
234233
THashMap<TNodeId, ui32> NodeRestartsPerPeriod;
235-
ui64 StorageQuota;
236-
ui64 StorageUsage;
234+
ui64 StorageQuota = 0;
235+
ui64 StorageUsage = 0;
237236
TMaybeServerlessComputeResourcesMode ServerlessComputeResourcesMode;
238237
};
239238

@@ -431,6 +430,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
431430
TTabletId RootHiveId;
432431
THashMap<TString, TTenantInfo> TenantByPath;
433432
THashMap<TString, THolder<NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult>> DescribeByPath;
433+
THashMap<TString, THashSet<TString>> PathsByPoolName;
434434
THashMap<TString, Ydb::Cms::GetDatabaseStatusResult> DatabaseStatusByPath;
435435
THashMap<TString, THolder<NTenantSlotBroker::TEvTenantSlotBroker::TEvTenantState>> TenantStateByPath;
436436
THashMap<TString, THolder<NSchemeCache::TSchemeCacheNavigate>> NavigateResult;
@@ -442,7 +442,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
442442
THolder<TEvBlobStorage::TEvControllerConfigResponse> BaseConfig;
443443

444444
THashSet<TNodeId> NodeIds;
445-
THashSet<TNodeId> StorageNodeIds;
446445
THashSet<TNodeId> ComputeNodeIds;
447446
std::unordered_map<std::pair<TNodeId, int>, ui32> NodeRetries;
448447
ui32 MaxRetries = 20;
@@ -463,8 +462,8 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
463462
std::unordered_set<TString> ValidPDisks;
464463
std::unordered_set<TGroupId> ValidGroups;
465464

466-
THashMap<TString, TStoragePoolState> StoragePoolState;
467-
THashSet<TString> StoragePoolSeen;
465+
THashMap<ui64, TStoragePoolState> StoragePoolState;
466+
THashSet<ui64> StoragePoolSeen;
468467

469468
THashSet<TNodeId> UnavailableComputeNodes;
470469

@@ -585,37 +584,10 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
585584

586585
Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes());
587586
++Requests;
588-
Send(MakeBlobStorageNodeWardenID(SelfId().NodeId()), new TEvNodeWardenQueryStorageConfig(false));
589-
++Requests;
590587

591588
Become(&TThis::StateWait, Timeout, new TEvents::TEvWakeup());
592589
}
593590

594-
void Handle(TEvNodeWardenStorageConfig::TPtr ev) {
595-
if (const NKikimrBlobStorage::TStorageConfig& config = *ev->Get()->Config; config.HasBlobStorageConfig()) {
596-
if (const auto& bsConfig = config.GetBlobStorageConfig(); bsConfig.HasServiceSet()) {
597-
const auto& staticConfig = bsConfig.GetServiceSet();
598-
for (const NKikimrBlobStorage::TNodeWardenServiceSet_TPDisk& pDisk : staticConfig.pdisks()) {
599-
RequestStorageNode(pDisk.GetNodeID());
600-
}
601-
for (const NKikimrBlobStorage::TGroupInfo& group : staticConfig.groups()) {
602-
ValidGroups.emplace(group.GetGroupID());
603-
TString storagePoolName = group.GetStoragePoolName();
604-
if (!storagePoolName) {
605-
storagePoolName = STATIC_STORAGE_POOL_NAME;
606-
}
607-
StoragePoolState[storagePoolName].Groups.emplace(group.groupid());
608-
609-
if (!IsSpecificDatabaseFilter()) {
610-
DatabaseState[DomainPath].StoragePoolNames.emplace(storagePoolName);
611-
}
612-
}
613-
}
614-
}
615-
616-
RequestDone("TEvNodeWardenStorageConfig");
617-
}
618-
619591
STATEFN(StateWait) {
620592
switch (ev->GetTypeRewrite()) {
621593
hFunc(TEvents::TEvUndelivered, Handle);
@@ -627,15 +599,13 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
627599
hFunc(TEvHive::TEvResponseHiveInfo, Handle);
628600
hFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle);
629601
hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle)
630-
hFunc(TEvBlobStorage::TEvControllerSelectGroupsResult, Handle);
631602
hFunc(TEvBlobStorage::TEvControllerConfigResponse, Handle);
632603
hFunc(NNodeWhiteboard::TEvWhiteboard::TEvSystemStateResponse, Handle);
633604
hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected);
634605
hFunc(TEvTabletPipe::TEvClientDestroyed, Handle);
635606
hFunc(TEvTabletPipe::TEvClientConnected, Handle);
636607
hFunc(TEvPrivate::TEvRetryNodeWhiteboard, Handle);
637608
cFunc(TEvents::TSystem::Wakeup, HandleTimeout);
638-
hFunc(TEvNodeWardenStorageConfig, Handle);
639609
}
640610
}
641611

@@ -702,16 +672,10 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
702672
RequestTabletPipe(ConsoleId, "TEvListTenantsRequest", request.Release());
703673
}
704674

705-
void RequestSelectGroups(const TString& storagePoolName) {
706-
THolder<TEvBlobStorage::TEvControllerSelectGroups> request = MakeHolder<TEvBlobStorage::TEvControllerSelectGroups>();
707-
request->Record.SetReturnAllMatchingGroups(true);
708-
request->Record.AddGroupParameters()->MutableStoragePoolSpecifier()->SetName(storagePoolName);
709-
RequestTabletPipe(BsControllerId, "TEvControllerSelectGroups:" + storagePoolName, request.Release());
710-
}
711-
712675
void RequestConfig() {
713676
THolder<TEvBlobStorage::TEvControllerConfigRequest> request = MakeHolder<TEvBlobStorage::TEvControllerConfigRequest>();
714677
request->Record.MutableRequest()->AddCommand()->MutableQueryBaseConfig();
678+
request->Record.MutableRequest()->AddCommand()->MutableReadStoragePool()->SetBoxId(Max<ui64>());
715679
RequestTabletPipe(BsControllerId, "TEvControllerConfigRequest", request.Release());
716680
}
717681

@@ -758,12 +722,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
758722
}
759723
}
760724

761-
void RequestStorageNode(TNodeId nodeId) {
762-
if (StorageNodeIds.emplace(nodeId).second) {
763-
RequestGenericNode(nodeId);
764-
}
765-
}
766-
767725
void Handle(TEvPrivate::TEvRetryNodeWhiteboard::TPtr& ev) {
768726
switch (ev->Get()->EventId) {
769727
case NNodeWhiteboard::TEvWhiteboard::EvSystemStateRequest:
@@ -854,41 +812,24 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
854812
if (pbRecord.HasResponse() && pbRecord.GetResponse().StatusSize() > 0) {
855813
const NKikimrBlobStorage::TConfigResponse::TStatus& pbStatus(pbRecord.GetResponse().GetStatus(0));
856814
if (pbStatus.HasBaseConfig()) {
857-
const NKikimrBlobStorage::TBaseConfig& pbConfig(pbStatus.GetBaseConfig());
858-
for (const NKikimrBlobStorage::TBaseConfig::TPDisk& pDisk : pbConfig.GetPDisk()) {
859-
RequestStorageNode(pDisk.GetNodeId());
860-
}
861815
BaseConfig = ev->Release();
862816
}
863817
}
864818
RequestDone("TEvControllerConfigResponse");
865819
}
866820

867-
void Handle(TEvBlobStorage::TEvControllerSelectGroupsResult::TPtr& ev) {
868-
TabletRequests.CompleteRequest(ev->Cookie);
869-
for (const auto& matchingGroups : ev->Get()->Record.matchinggroups()) {
870-
for (const auto& group : matchingGroups.groups()) {
871-
TString storagePoolName = group.storagepoolname();
872-
StoragePoolState[storagePoolName].Groups.emplace(group.groupid());
873-
StoragePoolState[storagePoolName].AuthenticGroups.emplace(group.groupid());
874-
}
875-
}
876-
RequestDone("TEvControllerSelectGroupsResult");
877-
}
878-
879821
void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev) {
880822
TabletRequests.CompleteRequest(ev->Cookie);
881823
if (ev->Get()->GetRecord().status() == NKikimrScheme::StatusSuccess) {
882824
TString path = ev->Get()->GetRecord().path();
883825
TDatabaseState& state(DatabaseState[path]);
884826
for (const auto& storagePool : ev->Get()->GetRecord().pathdescription().domaindescription().storagepools()) {
885827
TString storagePoolName = storagePool.name();
886-
state.StoragePoolNames.emplace(storagePoolName);
887-
StoragePoolState[storagePoolName].Kind = storagePool.kind();
888-
RequestSelectGroups(storagePoolName);
828+
PathsByPoolName[storagePoolName].emplace(path); // no poolId in TEvDescribeSchemeResult, so it's neccesary to keep poolNames instead
889829
}
890830
if (path == DomainPath) {
891-
state.StoragePoolNames.emplace(STATIC_STORAGE_POOL_NAME);
831+
state.StoragePools.emplace(0); // static group has poolId = 0
832+
StoragePoolState[0].Name = STATIC_STORAGE_POOL_NAME;
892833
}
893834
state.StorageUsage = ev->Get()->GetRecord().pathdescription().domaindescription().diskspaceusage().tables().totalsize();
894835
state.StorageQuota = ev->Get()->GetRecord().pathdescription().domaindescription().databasequotas().data_size_hard_quota();
@@ -1106,14 +1047,26 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
11061047
}
11071048
for (const NKikimrBlobStorage::TBaseConfig::TGroup& group : pbConfig.GetGroup()) {
11081049
auto groupId = group.GetGroupId();
1050+
auto poolId = group.GetStoragePoolId();
11091051
ValidGroups.emplace(groupId);
11101052
BSConfigGroups.emplace(groupId, &group);
1053+
StoragePoolState[poolId].Groups.emplace(group.groupid());
11111054
}
11121055
for (const NKikimrBlobStorage::TBaseConfig::TNode& node : pbConfig.GetNode()) {
11131056
auto nodeId = node.GetNodeId();
11141057
BSConfigNodes.emplace(nodeId, &node);
11151058
}
11161059
}
1060+
const NKikimrBlobStorage::TConfigResponse::TStatus& spStatus(pbRecord.GetResponse().GetStatus(1));
1061+
for (const NKikimrBlobStorage::TDefineStoragePool& pool : spStatus.GetStoragePool()) { // there is no specific pool for static group here
1062+
ui64 poolId = pool.GetStoragePoolId();
1063+
TString storagePoolName = pool.GetName();
1064+
StoragePoolState[poolId].Name = storagePoolName;
1065+
1066+
for (const TString& path : PathsByPoolName[storagePoolName]) {
1067+
DatabaseState[path].StoragePools.emplace(poolId);
1068+
}
1069+
}
11171070
}
11181071
for (auto itPDisk = BSConfigPDisks.begin(); itPDisk != BSConfigPDisks.end();) {
11191072
if (ValidPDisks.count(itPDisk->first)) {
@@ -1380,10 +1333,6 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
13801333
return TStringBuilder() << vSlot.vslotid().nodeid() << "-" << vSlot.vslotid().pdiskid();
13811334
}
13821335

1383-
static TString GetPDiskId(const NKikimrBlobStorage::TNodeWardenServiceSet_TPDisk& pDisk) {
1384-
return TStringBuilder() << pDisk.nodeid() << "-" << pDisk.pdiskid();
1385-
}
1386-
13871336
void FillPDiskStatus(const TString& pDiskId, Ydb::Monitoring::StoragePDiskStatus& storagePDiskStatus, TSelfCheckContext context) {
13881337
context.Location.clear_database(); // PDisks are shared between databases
13891338
context.Location.mutable_storage()->mutable_pool()->clear_name(); // PDisks are shared between pools
@@ -1943,9 +1892,9 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
19431892
mergeContext.FillRecords(records);
19441893
}
19451894

1946-
void FillPoolStatus(const TString& poolName, const TStoragePoolState& pool, Ydb::Monitoring::StoragePoolStatus& storagePoolStatus, TSelfCheckContext context) {
1947-
context.Location.mutable_storage()->mutable_pool()->set_name(poolName);
1948-
storagePoolStatus.set_id(poolName);
1895+
void FillPoolStatus(const TStoragePoolState& pool, Ydb::Monitoring::StoragePoolStatus& storagePoolStatus, TSelfCheckContext context) {
1896+
context.Location.mutable_storage()->mutable_pool()->set_name(pool.Name);
1897+
storagePoolStatus.set_id(pool.Name);
19491898
for (auto groupId : pool.Groups) {
19501899
FillGroupStatus(groupId, *storagePoolStatus.add_groups(), {&context, "STORAGE_GROUP"});
19511900
}
@@ -1970,17 +1919,16 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
19701919
}
19711920

19721921
void FillStorage(TDatabaseState& databaseState, Ydb::Monitoring::StorageStatus& storageStatus, TSelfCheckContext context) {
1973-
if (databaseState.StoragePoolNames.empty()) {
1922+
if (!BaseConfig) {
1923+
context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "System tablet BSC didn't provide information", ETags::StorageState);
1924+
} else if (databaseState.StoragePools.empty()) {
19741925
context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "There are no storage pools", ETags::StorageState);
19751926
} else {
1976-
for (const TString& poolName : databaseState.StoragePoolNames) {
1977-
auto itStoragePoolState = StoragePoolState.find(poolName);
1927+
for (const ui64 poolId : databaseState.StoragePools) {
1928+
auto itStoragePoolState = StoragePoolState.find(poolId);
19781929
if (itStoragePoolState != StoragePoolState.end()) {
1979-
if (!itStoragePoolState->second.AuthenticGroups.empty()) {
1980-
itStoragePoolState->second.Groups = itStoragePoolState->second.AuthenticGroups;
1981-
}
1982-
FillPoolStatus(poolName, itStoragePoolState->second, *storageStatus.add_pools(), {&context, "STORAGE_POOL"});
1983-
StoragePoolSeen.emplace(poolName);
1930+
FillPoolStatus(itStoragePoolState->second, *storageStatus.add_pools(), {&context, "STORAGE_POOL"});
1931+
StoragePoolSeen.emplace(poolId);
19841932
}
19851933
}
19861934
switch (context.GetOverallStatus()) {
@@ -2142,14 +2090,14 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
21422090
context.UpdateMaxStatus(tabletContext.GetOverallStatus());
21432091
}
21442092
}
2145-
if (!FilterDatabase) {
2093+
if (!FilterDatabase && BaseConfig) {
21462094
TDatabaseState unknownDatabase;
2147-
for (auto& [name, pool] : StoragePoolState) {
2148-
if (StoragePoolSeen.count(name) == 0) {
2149-
unknownDatabase.StoragePoolNames.insert(name);
2095+
for (auto& [id, pool] : StoragePoolState) {
2096+
if (StoragePoolSeen.count(id) == 0) {
2097+
unknownDatabase.StoragePools.insert(id);
21502098
}
21512099
}
2152-
if (!unknownDatabase.StoragePoolNames.empty()) {
2100+
if (!unknownDatabase.StoragePools.empty()) {
21532101
Ydb::Monitoring::DatabaseStatus& databaseStatus(*context.Result->add_database_status());
21542102
TSelfCheckResult storageContext;
21552103
FillStorage(unknownDatabase, *databaseStatus.mutable_storage(), {&storageContext, "STORAGE"});

0 commit comments

Comments
 (0)