Skip to content

Support LayoutCorrect fields for SysView #15006

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions ydb/core/mind/bscontroller/bsc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include "self_heal.h"
#include "sys_view.h"
#include "console_interaction.h"
#include "group_geometry_info.h"
#include "group_layout_checker.h"

#include <library/cpp/streams/zstd/zstd.h>

Expand Down Expand Up @@ -82,6 +84,25 @@ void TBlobStorageController::TGroupInfo::CalculateGroupStatus() {
}
}

void TBlobStorageController::TGroupInfo::CalculateLayoutStatus(TBlobStorageController *self,
TBlobStorageGroupInfo::TTopology *topology, const std::function<TGroupGeometryInfo()>& getGeom) {
LayoutCorrect = true;
if (VDisksInGroup) {
NLayoutChecker::TGroupLayout layout(*topology);
NLayoutChecker::TDomainMapper mapper;
auto geom = getGeom();

for (size_t index = 0; index < VDisksInGroup.size(); ++index) {
const TVSlotInfo *slot = VDisksInGroup[index];
TPDiskId pdiskId = slot->VSlotId.ComprisingPDiskId();
const auto& location = self->HostRecords->GetLocation(pdiskId.NodeId);
layout.AddDisk({mapper, location, pdiskId, geom}, index);
}

LayoutCorrect = layout.IsCorrect();
}
}

NKikimrBlobStorage::TGroupStatus::E TBlobStorageController::DeriveStatus(const TBlobStorageGroupInfo::TTopology *topology,
const TBlobStorageGroupInfo::TGroupVDisks& failed) {
auto& checker = *topology->QuorumChecker;
Expand Down
8 changes: 8 additions & 0 deletions ydb/core/mind/bscontroller/config_fit_groups.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,14 @@ namespace NKikimr {

groupInfo->FinishVDisksInGroup();
groupInfo->CalculateGroupStatus();
groupInfo->CalculateLayoutStatus(&State.Self, groupInfo->Topology.get(), [&] {
const auto& pools = State.StoragePools.Get();
if (const auto it = pools.find(groupInfo->StoragePoolId); it != pools.end()) {
return TGroupGeometryInfo(groupInfo->Topology->GType, it->second.GetGroupGeometry());
}
Y_DEBUG_ABORT(); // this can't normally happen
return TGroupGeometryInfo();
});

return res;
}
Expand Down
18 changes: 10 additions & 8 deletions ydb/core/mind/bscontroller/group_geometry_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,18 @@ namespace NKikimr::NBsController {
struct TExFitGroupError : yexception {};

class TGroupGeometryInfo {
const TBlobStorageGroupType Type;
ui32 NumFailRealms;
ui32 NumFailDomainsPerFailRealm;
ui32 NumVDisksPerFailDomain;
ui32 RealmLevelBegin;
ui32 RealmLevelEnd;
ui32 DomainLevelBegin;
ui32 DomainLevelEnd;
TBlobStorageGroupType Type;
ui32 NumFailRealms = 0;
ui32 NumFailDomainsPerFailRealm = 0;
ui32 NumVDisksPerFailDomain = 0;
ui32 RealmLevelBegin = 0;
ui32 RealmLevelEnd = 0;
ui32 DomainLevelBegin = 0;
ui32 DomainLevelEnd = 0;

public:
explicit TGroupGeometryInfo() = default;

TGroupGeometryInfo(TBlobStorageGroupType type, NKikimrBlobStorage::TGroupGeometry g)
: Type(type)
, NumFailRealms(g.GetNumFailRealms())
Expand Down
58 changes: 51 additions & 7 deletions ydb/core/mind/bscontroller/group_layout_checker.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ namespace NKikimr::NBsController {

THashMap<TEntityId, ui32> NumDisksPerDevice;

bool Correct = true;

TGroupLayout(const TBlobStorageGroupInfo::TTopology& topology)
: Topology(topology)
, NumDisksInRealm(Topology.GetTotalFailRealmsNum())
Expand All @@ -187,17 +189,19 @@ namespace NKikimr::NBsController {

void UpdateDisk(const TPDiskLayoutPosition& pos, ui32 orderNumber, ui32 value) {
NumDisks += value;
NumDisksPerRealmGroup[pos.RealmGroup] += value;
const ui32 z = NumDisksPerRealmGroup[pos.RealmGroup] += value;
const TVDiskIdShort vdisk = Topology.GetVDiskId(orderNumber);
NumDisksInRealm[vdisk.FailRealm] += value;
NumDisksPerRealm[vdisk.FailRealm][pos.Realm] += value;
NumDisksPerRealmTotal[pos.Realm] += value;
const ui32 x1 = NumDisksInRealm[vdisk.FailRealm] += value;
const ui32 x2 = NumDisksPerRealm[vdisk.FailRealm][pos.Realm] += value;
const ui32 x3 = NumDisksPerRealmTotal[pos.Realm] += value;
const ui32 domainIdx = Topology.GetFailDomainOrderNumber(vdisk);
NumDisksInDomain[domainIdx] += value;
NumDisksPerDomain[domainIdx][pos.Domain] += value;
NumDisksPerDomainTotal[pos.Domain] += value;
const ui32 y1 = NumDisksInDomain[domainIdx] += value;
const ui32 y2 = NumDisksPerDomain[domainIdx][pos.Domain] += value;
const ui32 y3 = NumDisksPerDomainTotal[pos.Domain] += value;

NumDisksPerDevice[pos.Device] += value;

Correct = Correct && x1 == x2 && x2 == x3 && y1 == y2 && y2 == y3 && z == NumDisks;
}

void AddDisk(const TPDiskLayoutPosition& pos, ui32 orderNumber) {
Expand Down Expand Up @@ -233,6 +237,46 @@ namespace NKikimr::NBsController {
AddDisk(pos, orderNumber);
return score;
}

bool IsCorrect() const {
#ifdef NDEBUG
return Correct;
#endif

if (NumDisksPerRealmGroup.size() != 1) { // all disks must reside in the same realm group
Y_DEBUG_ABORT_UNLESS(!Correct);
return false;
}

for (size_t i = 0, num = NumDisksInRealm.size(); i < num; ++i) {
for (const auto& [entityId, numDisks] : NumDisksPerRealm[i]) {
Y_DEBUG_ABORT_UNLESS(NumDisksPerRealmTotal.contains(entityId));
if (numDisks != NumDisksInRealm[i] || numDisks != NumDisksPerRealmTotal.at(entityId)) {
// the first case is when group realm contains disks from different real-world realms (DC's)
// -- this is not as bad as it seems, but breaks strict failure model; the second one is a bit
// worse, it means that disks from this real-world realm (DC) are in several realms, which
// may lead to unavailability when DC goes down
Y_DEBUG_ABORT_UNLESS(!Correct);
return false;
}
}
}

// the same code goes for domains
for (size_t j = 0, num = NumDisksInDomain.size(); j < num; ++j) {
for (const auto& [entityId, numDisks] : NumDisksPerDomain[j]) {
Y_DEBUG_ABORT_UNLESS(NumDisksPerDomainTotal.contains(entityId));
if (numDisks != NumDisksInDomain[j] || numDisks != NumDisksPerDomainTotal.at(entityId)) {
Y_DEBUG_ABORT_UNLESS(!Correct);
return false;
}

}
}

Y_DEBUG_ABORT_UNLESS(Correct);
return true;
}
};

} // NLayoutChecker
Expand Down
8 changes: 8 additions & 0 deletions ydb/core/mind/bscontroller/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ namespace NKikimr {

namespace NBsController {

class TGroupGeometryInfo;

using NTabletFlatExecutor::TTabletExecutedFlat;
using NTabletFlatExecutor::ITransaction;
using NTabletFlatExecutor::TTransactionBase;
Expand Down Expand Up @@ -618,6 +620,12 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
// be recalculated too
void CalculateGroupStatus();

// group layout status: whether it is positioned correctly
bool LayoutCorrect = false;

void CalculateLayoutStatus(TBlobStorageController *self, TBlobStorageGroupInfo::TTopology *topology,
const std::function<TGroupGeometryInfo()>& getGeom);

template<typename T>
static void Apply(TBlobStorageController* /*controller*/, T&& callback) {
static TTableAdapter<Table, TGroupInfo,
Expand Down
15 changes: 15 additions & 0 deletions ydb/core/mind/bscontroller/load_everything.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "impl.h"
#include "console_interaction.h"
#include "group_geometry_info.h"

#include <ydb/library/yaml_config/yaml_config.h>

Expand Down Expand Up @@ -515,9 +516,23 @@ class TBlobStorageController::TTxLoadEverything : public TTransactionBase<TBlobS
}
}

THashMap<TBoxStoragePoolId, TGroupGeometryInfo> cache;

// calculate group status for all groups
for (auto& [id, group] : Self->GroupMap) {
group->CalculateGroupStatus();

group->CalculateLayoutStatus(Self, group->Topology.get(), [&] {
const auto [it, inserted] = cache.try_emplace(group->StoragePoolId);
if (inserted) {
if (const auto jt = Self->StoragePools.find(it->first); jt != Self->StoragePools.end()) {
it->second = TGroupGeometryInfo(group->Topology->GType, jt->second.GetGroupGeometry());
} else {
Y_DEBUG_ABORT();
}
}
return it->second;
});
}

return true;
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/mind/bscontroller/monitoring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1388,6 +1388,7 @@ void TBlobStorageController::RenderGroupTable(IOutputStream& out, std::function<
TAG_ATTRS(TTableH, {{"title", "PutUserData Latency"}}) { out << "PutUserData<br/>Latency"; }
TAG_ATTRS(TTableH, {{"title", "GetFast Latency"}}) { out << "GetFast<br/>Latency"; }
TABLEH() { out << "Seen operational"; }
TABLEH() { out << "Layout correct"; }
TABLEH() { out << "Operating<br/>status"; }
TABLEH() { out << "Expected<br/>status"; }
TABLEH() { out << "Donors"; }
Expand Down Expand Up @@ -1448,6 +1449,7 @@ void TBlobStorageController::RenderGroupRow(IOutputStream& out, const TGroupInfo
renderLatency(group.LatencyStats.PutUserData);
renderLatency(group.LatencyStats.GetFast);
TABLED() { out << (group.SeenOperational ? "YES" : ""); }
TABLED() { out << (group.LayoutCorrect ? "" : "NO"); }

const auto& status = group.Status;
TABLED() { out << NKikimrBlobStorage::TGroupStatus::E_Name(status.OperatingStatus); }
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/mind/bscontroller/sys_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,8 @@ void CopyInfo(NKikimrSysView::TGroupInfo* info, const THolder<TBlobStorageContro
if (latencyStats.GetFast) {
info->SetGetFastLatency(latencyStats.GetFast->MicroSeconds());
}

info->SetLayoutCorrect(groupInfo->LayoutCorrect);
}

void CopyInfo(NKikimrSysView::TStoragePoolInfo* info, const TBlobStorageController::TStoragePoolInfo& poolInfo) {
Expand Down
10 changes: 10 additions & 0 deletions ydb/core/mind/bscontroller/virtual_group.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "impl.h"
#include "config.h"
#include "group_geometry_info.h"

namespace NKikimr::NBsController {

Expand Down Expand Up @@ -89,6 +90,7 @@ namespace NKikimr::NBsController {

GroupFailureModelChanged.insert(group->ID);
group->CalculateGroupStatus();
group->CalculateLayoutStatus(&Self, group->Topology.get(), {});

NKikimrBlobDepot::TBlobDepotConfig config;
config.SetVirtualGroupId(group->ID.GetRawId());
Expand Down Expand Up @@ -255,6 +257,14 @@ namespace NKikimr::NBsController {
State->DeleteExistingGroup(group->ID);
}
group->CalculateGroupStatus();
group->CalculateLayoutStatus(Self, group->Topology.get(), [&] {
const auto& pools = State->StoragePools.Get();
if (const auto it = pools.find(group->StoragePoolId); it != pools.end()) {
return TGroupGeometryInfo(group->Topology->GType, it->second.GetGroupGeometry());
}
Y_DEBUG_ABORT();
return TGroupGeometryInfo();
});
TString error;
if (State->Changed() && !Self->CommitConfigUpdates(*State, true, true, true, txc, &error)) {
STLOG(PRI_ERROR, BS_CONTROLLER, BSCVG08, "failed to commit update", (VirtualGroupId, GroupId), (Error, error));
Expand Down
1 change: 1 addition & 0 deletions ydb/core/protos/sys_view.proto
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ message TGroupInfo {
// desired disk categories ?
// down/persisted down ?
// metrics ?
optional bool LayoutCorrect = 16; // is the group layout correct?
}

message TGroupEntry {
Expand Down
4 changes: 3 additions & 1 deletion ydb/core/sys_view/common/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ struct Schema : NIceDb::Schema {
struct PutTabletLogLatency : Column<13, NScheme::NTypeIds::Interval> {};
struct PutUserDataLatency : Column<14, NScheme::NTypeIds::Interval> {};
struct GetFastLatency : Column<15, NScheme::NTypeIds::Interval> {};
struct LayoutCorrect : Column<16, NScheme::NTypeIds::Bool> {};

using TKey = TableKey<GroupId>;
using TColumns = TableColumns<
Expand All @@ -321,7 +322,8 @@ struct Schema : NIceDb::Schema {
SeenOperational,
PutTabletLogLatency,
PutUserDataLatency,
GetFastLatency>;
GetFastLatency,
LayoutCorrect>;
};

struct StoragePools : Table<7> {
Expand Down
1 change: 1 addition & 0 deletions ydb/core/sys_view/storage/groups.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class TGroupsScan : public TStorageScanBase<TGroupsScan, TEvSysView::TEvGetGroup
{T::PutTabletLogLatency::ColumnId, {E::kInfoFieldNumber, V::kPutTabletLogLatencyFieldNumber}},
{T::PutUserDataLatency::ColumnId, {E::kInfoFieldNumber, V::kPutUserDataLatencyFieldNumber}},
{T::GetFastLatency::ColumnId, {E::kInfoFieldNumber, V::kGetFastLatencyFieldNumber}},
{T::LayoutCorrect::ColumnId, {E::kInfoFieldNumber, V::kLayoutCorrectFieldNumber}},
};
return fieldMap;
}
Expand Down
6 changes: 4 additions & 2 deletions ydb/core/sys_view/ut_kqp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1058,7 +1058,8 @@ Y_UNIT_TEST_SUITE(SystemView) {
LifeCyclePhase,
PutTabletLogLatency,
PutUserDataLatency,
StoragePoolId
StoragePoolId,
LayoutCorrect
FROM `/Root/.sys/ds_groups` WHERE GroupId >= 0x80000000;
)").GetValueSync();

Expand All @@ -1074,7 +1075,7 @@ Y_UNIT_TEST_SUITE(SystemView) {
}
}

TYsonFieldChecker check(ysonString, 12);
TYsonFieldChecker check(ysonString, 13);

check.Uint64(0u); // AllocatedSize
check.Uint64GreaterOrEquals(0u); // AvailableSize
Expand All @@ -1088,6 +1089,7 @@ Y_UNIT_TEST_SUITE(SystemView) {
check.Null(); // PutTabletLogLatency
check.Null(); // PutUserDataLatency
check.Uint64(2u); // StoragePoolId
check.Bool(true); // LayoutCorrect
}

Y_UNIT_TEST(StoragePoolsFields) {
Expand Down
Loading