Skip to content

Commit b9128ab

Browse files
alexvruStekPerepolnen
authored andcommitted
Support LayoutCorrect fields for SysView (ydb-platform#15006)
1 parent e39f39c commit b9128ab

13 files changed

+136
-18
lines changed

ydb/core/mind/bscontroller/bsc.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
#include "self_heal.h"
44
#include "sys_view.h"
55
#include "console_interaction.h"
6+
#include "group_geometry_info.h"
7+
#include "group_layout_checker.h"
68

79
#include <library/cpp/streams/zstd/zstd.h>
810

@@ -82,6 +84,25 @@ void TBlobStorageController::TGroupInfo::CalculateGroupStatus() {
8284
}
8385
}
8486

87+
void TBlobStorageController::TGroupInfo::CalculateLayoutStatus(TBlobStorageController *self,
88+
TBlobStorageGroupInfo::TTopology *topology, const std::function<TGroupGeometryInfo()>& getGeom) {
89+
LayoutCorrect = true;
90+
if (VDisksInGroup) {
91+
NLayoutChecker::TGroupLayout layout(*topology);
92+
NLayoutChecker::TDomainMapper mapper;
93+
auto geom = getGeom();
94+
95+
for (size_t index = 0; index < VDisksInGroup.size(); ++index) {
96+
const TVSlotInfo *slot = VDisksInGroup[index];
97+
TPDiskId pdiskId = slot->VSlotId.ComprisingPDiskId();
98+
const auto& location = self->HostRecords->GetLocation(pdiskId.NodeId);
99+
layout.AddDisk({mapper, location, pdiskId, geom}, index);
100+
}
101+
102+
LayoutCorrect = layout.IsCorrect();
103+
}
104+
}
105+
85106
NKikimrBlobStorage::TGroupStatus::E TBlobStorageController::DeriveStatus(const TBlobStorageGroupInfo::TTopology *topology,
86107
const TBlobStorageGroupInfo::TGroupVDisks& failed) {
87108
auto& checker = *topology->QuorumChecker;

ydb/core/mind/bscontroller/config_fit_groups.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,14 @@ namespace NKikimr {
621621

622622
groupInfo->FinishVDisksInGroup();
623623
groupInfo->CalculateGroupStatus();
624+
groupInfo->CalculateLayoutStatus(&State.Self, groupInfo->Topology.get(), [&] {
625+
const auto& pools = State.StoragePools.Get();
626+
if (const auto it = pools.find(groupInfo->StoragePoolId); it != pools.end()) {
627+
return TGroupGeometryInfo(groupInfo->Topology->GType, it->second.GetGroupGeometry());
628+
}
629+
Y_DEBUG_ABORT(); // this can't normally happen
630+
return TGroupGeometryInfo();
631+
});
624632

625633
return res;
626634
}

ydb/core/mind/bscontroller/group_geometry_info.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,18 @@ namespace NKikimr::NBsController {
1111
struct TExFitGroupError : yexception {};
1212

1313
class TGroupGeometryInfo {
14-
const TBlobStorageGroupType Type;
15-
ui32 NumFailRealms;
16-
ui32 NumFailDomainsPerFailRealm;
17-
ui32 NumVDisksPerFailDomain;
18-
ui32 RealmLevelBegin;
19-
ui32 RealmLevelEnd;
20-
ui32 DomainLevelBegin;
21-
ui32 DomainLevelEnd;
14+
TBlobStorageGroupType Type;
15+
ui32 NumFailRealms = 0;
16+
ui32 NumFailDomainsPerFailRealm = 0;
17+
ui32 NumVDisksPerFailDomain = 0;
18+
ui32 RealmLevelBegin = 0;
19+
ui32 RealmLevelEnd = 0;
20+
ui32 DomainLevelBegin = 0;
21+
ui32 DomainLevelEnd = 0;
2222

2323
public:
24+
explicit TGroupGeometryInfo() = default;
25+
2426
TGroupGeometryInfo(TBlobStorageGroupType type, NKikimrBlobStorage::TGroupGeometry g)
2527
: Type(type)
2628
, NumFailRealms(g.GetNumFailRealms())

ydb/core/mind/bscontroller/group_layout_checker.h

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,8 @@ namespace NKikimr::NBsController {
177177

178178
THashMap<TEntityId, ui32> NumDisksPerDevice;
179179

180+
bool Correct = true;
181+
180182
TGroupLayout(const TBlobStorageGroupInfo::TTopology& topology)
181183
: Topology(topology)
182184
, NumDisksInRealm(Topology.GetTotalFailRealmsNum())
@@ -187,17 +189,19 @@ namespace NKikimr::NBsController {
187189

188190
void UpdateDisk(const TPDiskLayoutPosition& pos, ui32 orderNumber, ui32 value) {
189191
NumDisks += value;
190-
NumDisksPerRealmGroup[pos.RealmGroup] += value;
192+
const ui32 z = NumDisksPerRealmGroup[pos.RealmGroup] += value;
191193
const TVDiskIdShort vdisk = Topology.GetVDiskId(orderNumber);
192-
NumDisksInRealm[vdisk.FailRealm] += value;
193-
NumDisksPerRealm[vdisk.FailRealm][pos.Realm] += value;
194-
NumDisksPerRealmTotal[pos.Realm] += value;
194+
const ui32 x1 = NumDisksInRealm[vdisk.FailRealm] += value;
195+
const ui32 x2 = NumDisksPerRealm[vdisk.FailRealm][pos.Realm] += value;
196+
const ui32 x3 = NumDisksPerRealmTotal[pos.Realm] += value;
195197
const ui32 domainIdx = Topology.GetFailDomainOrderNumber(vdisk);
196-
NumDisksInDomain[domainIdx] += value;
197-
NumDisksPerDomain[domainIdx][pos.Domain] += value;
198-
NumDisksPerDomainTotal[pos.Domain] += value;
198+
const ui32 y1 = NumDisksInDomain[domainIdx] += value;
199+
const ui32 y2 = NumDisksPerDomain[domainIdx][pos.Domain] += value;
200+
const ui32 y3 = NumDisksPerDomainTotal[pos.Domain] += value;
199201

200202
NumDisksPerDevice[pos.Device] += value;
203+
204+
Correct = Correct && x1 == x2 && x2 == x3 && y1 == y2 && y2 == y3 && z == NumDisks;
201205
}
202206

203207
void AddDisk(const TPDiskLayoutPosition& pos, ui32 orderNumber) {
@@ -233,6 +237,46 @@ namespace NKikimr::NBsController {
233237
AddDisk(pos, orderNumber);
234238
return score;
235239
}
240+
241+
bool IsCorrect() const {
242+
#ifdef NDEBUG
243+
return Correct;
244+
#endif
245+
246+
if (NumDisksPerRealmGroup.size() != 1) { // all disks must reside in the same realm group
247+
Y_DEBUG_ABORT_UNLESS(!Correct);
248+
return false;
249+
}
250+
251+
for (size_t i = 0, num = NumDisksInRealm.size(); i < num; ++i) {
252+
for (const auto& [entityId, numDisks] : NumDisksPerRealm[i]) {
253+
Y_DEBUG_ABORT_UNLESS(NumDisksPerRealmTotal.contains(entityId));
254+
if (numDisks != NumDisksInRealm[i] || numDisks != NumDisksPerRealmTotal.at(entityId)) {
255+
// the first case is when group realm contains disks from different real-world realms (DC's)
256+
// -- this is not as bad as it seems, but breaks strict failure model; the second one is a bit
257+
// worse, it means that disks from this real-world realm (DC) are in several realms, which
258+
// may lead to unavailability when DC goes down
259+
Y_DEBUG_ABORT_UNLESS(!Correct);
260+
return false;
261+
}
262+
}
263+
}
264+
265+
// the same code goes for domains
266+
for (size_t j = 0, num = NumDisksInDomain.size(); j < num; ++j) {
267+
for (const auto& [entityId, numDisks] : NumDisksPerDomain[j]) {
268+
Y_DEBUG_ABORT_UNLESS(NumDisksPerDomainTotal.contains(entityId));
269+
if (numDisks != NumDisksInDomain[j] || numDisks != NumDisksPerDomainTotal.at(entityId)) {
270+
Y_DEBUG_ABORT_UNLESS(!Correct);
271+
return false;
272+
}
273+
274+
}
275+
}
276+
277+
Y_DEBUG_ABORT_UNLESS(Correct);
278+
return true;
279+
}
236280
};
237281

238282
} // NLayoutChecker

ydb/core/mind/bscontroller/impl.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ namespace NKikimr {
2020

2121
namespace NBsController {
2222

23+
class TGroupGeometryInfo;
24+
2325
using NTabletFlatExecutor::TTabletExecutedFlat;
2426
using NTabletFlatExecutor::ITransaction;
2527
using NTabletFlatExecutor::TTransactionBase;
@@ -618,6 +620,12 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
618620
// be recalculated too
619621
void CalculateGroupStatus();
620622

623+
// group layout status: whether it is positioned correctly
624+
bool LayoutCorrect = false;
625+
626+
void CalculateLayoutStatus(TBlobStorageController *self, TBlobStorageGroupInfo::TTopology *topology,
627+
const std::function<TGroupGeometryInfo()>& getGeom);
628+
621629
template<typename T>
622630
static void Apply(TBlobStorageController* /*controller*/, T&& callback) {
623631
static TTableAdapter<Table, TGroupInfo,

ydb/core/mind/bscontroller/load_everything.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "impl.h"
22
#include "console_interaction.h"
3+
#include "group_geometry_info.h"
34

45
#include <ydb/library/yaml_config/yaml_config.h>
56

@@ -518,9 +519,23 @@ class TBlobStorageController::TTxLoadEverything : public TTransactionBase<TBlobS
518519
}
519520
}
520521

522+
THashMap<TBoxStoragePoolId, TGroupGeometryInfo> cache;
523+
521524
// calculate group status for all groups
522525
for (auto& [id, group] : Self->GroupMap) {
523526
group->CalculateGroupStatus();
527+
528+
group->CalculateLayoutStatus(Self, group->Topology.get(), [&] {
529+
const auto [it, inserted] = cache.try_emplace(group->StoragePoolId);
530+
if (inserted) {
531+
if (const auto jt = Self->StoragePools.find(it->first); jt != Self->StoragePools.end()) {
532+
it->second = TGroupGeometryInfo(group->Topology->GType, jt->second.GetGroupGeometry());
533+
} else {
534+
Y_DEBUG_ABORT();
535+
}
536+
}
537+
return it->second;
538+
});
524539
}
525540

526541
return true;

ydb/core/mind/bscontroller/monitoring.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1388,6 +1388,7 @@ void TBlobStorageController::RenderGroupTable(IOutputStream& out, std::function<
13881388
TAG_ATTRS(TTableH, {{"title", "PutUserData Latency"}}) { out << "PutUserData<br/>Latency"; }
13891389
TAG_ATTRS(TTableH, {{"title", "GetFast Latency"}}) { out << "GetFast<br/>Latency"; }
13901390
TABLEH() { out << "Seen operational"; }
1391+
TABLEH() { out << "Layout correct"; }
13911392
TABLEH() { out << "Operating<br/>status"; }
13921393
TABLEH() { out << "Expected<br/>status"; }
13931394
TABLEH() { out << "Donors"; }
@@ -1448,6 +1449,7 @@ void TBlobStorageController::RenderGroupRow(IOutputStream& out, const TGroupInfo
14481449
renderLatency(group.LatencyStats.PutUserData);
14491450
renderLatency(group.LatencyStats.GetFast);
14501451
TABLED() { out << (group.SeenOperational ? "YES" : ""); }
1452+
TABLED() { out << (group.LayoutCorrect ? "" : "NO"); }
14511453

14521454
const auto& status = group.Status;
14531455
TABLED() { out << NKikimrBlobStorage::TGroupStatus::E_Name(status.OperatingStatus); }

ydb/core/mind/bscontroller/sys_view.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,8 @@ void CopyInfo(NKikimrSysView::TGroupInfo* info, const THolder<TBlobStorageContro
399399
if (latencyStats.GetFast) {
400400
info->SetGetFastLatency(latencyStats.GetFast->MicroSeconds());
401401
}
402+
403+
info->SetLayoutCorrect(groupInfo->LayoutCorrect);
402404
}
403405

404406
void CopyInfo(NKikimrSysView::TStoragePoolInfo* info, const TBlobStorageController::TStoragePoolInfo& poolInfo) {

ydb/core/mind/bscontroller/virtual_group.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "impl.h"
22
#include "config.h"
3+
#include "group_geometry_info.h"
34

45
namespace NKikimr::NBsController {
56

@@ -89,6 +90,7 @@ namespace NKikimr::NBsController {
8990

9091
GroupFailureModelChanged.insert(group->ID);
9192
group->CalculateGroupStatus();
93+
group->CalculateLayoutStatus(&Self, group->Topology.get(), {});
9294

9395
NKikimrBlobDepot::TBlobDepotConfig config;
9496
config.SetVirtualGroupId(group->ID.GetRawId());
@@ -255,6 +257,14 @@ namespace NKikimr::NBsController {
255257
State->DeleteExistingGroup(group->ID);
256258
}
257259
group->CalculateGroupStatus();
260+
group->CalculateLayoutStatus(Self, group->Topology.get(), [&] {
261+
const auto& pools = State->StoragePools.Get();
262+
if (const auto it = pools.find(group->StoragePoolId); it != pools.end()) {
263+
return TGroupGeometryInfo(group->Topology->GType, it->second.GetGroupGeometry());
264+
}
265+
Y_DEBUG_ABORT();
266+
return TGroupGeometryInfo();
267+
});
258268
TString error;
259269
if (State->Changed() && !Self->CommitConfigUpdates(*State, true, true, true, txc, &error)) {
260270
STLOG(PRI_ERROR, BS_CONTROLLER, BSCVG08, "failed to commit update", (VirtualGroupId, GroupId), (Error, error));

ydb/core/protos/sys_view.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ message TGroupInfo {
265265
// desired disk categories ?
266266
// down/persisted down ?
267267
// metrics ?
268+
optional bool LayoutCorrect = 16; // is the group layout correct?
268269
}
269270

270271
message TGroupEntry {

ydb/core/sys_view/common/schema.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ struct Schema : NIceDb::Schema {
306306
struct PutTabletLogLatency : Column<13, NScheme::NTypeIds::Interval> {};
307307
struct PutUserDataLatency : Column<14, NScheme::NTypeIds::Interval> {};
308308
struct GetFastLatency : Column<15, NScheme::NTypeIds::Interval> {};
309+
struct LayoutCorrect : Column<16, NScheme::NTypeIds::Bool> {};
309310

310311
using TKey = TableKey<GroupId>;
311312
using TColumns = TableColumns<
@@ -321,7 +322,8 @@ struct Schema : NIceDb::Schema {
321322
SeenOperational,
322323
PutTabletLogLatency,
323324
PutUserDataLatency,
324-
GetFastLatency>;
325+
GetFastLatency,
326+
LayoutCorrect>;
325327
};
326328

327329
struct StoragePools : Table<7> {

ydb/core/sys_view/storage/groups.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class TGroupsScan : public TStorageScanBase<TGroupsScan, TEvSysView::TEvGetGroup
3636
{T::PutTabletLogLatency::ColumnId, {E::kInfoFieldNumber, V::kPutTabletLogLatencyFieldNumber}},
3737
{T::PutUserDataLatency::ColumnId, {E::kInfoFieldNumber, V::kPutUserDataLatencyFieldNumber}},
3838
{T::GetFastLatency::ColumnId, {E::kInfoFieldNumber, V::kGetFastLatencyFieldNumber}},
39+
{T::LayoutCorrect::ColumnId, {E::kInfoFieldNumber, V::kLayoutCorrectFieldNumber}},
3940
};
4041
return fieldMap;
4142
}

ydb/core/sys_view/ut_kqp.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,7 +1074,8 @@ Y_UNIT_TEST_SUITE(SystemView) {
10741074
LifeCyclePhase,
10751075
PutTabletLogLatency,
10761076
PutUserDataLatency,
1077-
StoragePoolId
1077+
StoragePoolId,
1078+
LayoutCorrect
10781079
FROM `/Root/.sys/ds_groups` WHERE GroupId >= 0x80000000;
10791080
)").GetValueSync();
10801081

@@ -1090,7 +1091,7 @@ Y_UNIT_TEST_SUITE(SystemView) {
10901091
}
10911092
}
10921093

1093-
TYsonFieldChecker check(ysonString, 12);
1094+
TYsonFieldChecker check(ysonString, 13);
10941095

10951096
check.Uint64(0u); // AllocatedSize
10961097
check.Uint64GreaterOrEquals(0u); // AvailableSize
@@ -1104,6 +1105,7 @@ Y_UNIT_TEST_SUITE(SystemView) {
11041105
check.Null(); // PutTabletLogLatency
11051106
check.Null(); // PutUserDataLatency
11061107
check.Uint64(2u); // StoragePoolId
1108+
check.Bool(true); // LayoutCorrect
11071109
}
11081110

11091111
Y_UNIT_TEST(StoragePoolsFields) {

0 commit comments

Comments
 (0)