Skip to content

Commit 509064d

Browse files
authored
Add simplified mirror-3dc support in CMS (#11190)
1 parent b3d3a0f commit 509064d

7 files changed

+126
-42
lines changed

ydb/core/cms/cms.cpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -928,7 +928,7 @@ bool TCms::TryToLockVDisk(const TActionOptions& opts,
928928
return false;
929929
}
930930

931-
auto counters = CreateErasureCounter(ClusterInfo->BSGroup(groupId).Erasure.GetErasure(), vdisk, groupId);
931+
auto counters = CreateErasureCounter(ClusterInfo->BSGroup(groupId).Erasure.GetErasure(), vdisk, groupId, TabletCounters);
932932
counters->CountGroupState(ClusterInfo, State->Config.DefaultRetryTime, duration, error);
933933

934934
switch (opts.AvailabilityMode) {
@@ -943,10 +943,11 @@ bool TCms::TryToLockVDisk(const TActionOptions& opts,
943943
}
944944
break;
945945
case MODE_FORCE_RESTART:
946-
if ( counters->GroupAlreadyHasLockedDisks() && opts.PartialPermissionAllowed) {
946+
if (counters->GroupAlreadyHasLockedDisks() && !counters->GroupHasMoreThanOneDiskPerNode() && opts.PartialPermissionAllowed) {
947+
TabletCounters->Cumulative()[COUNTER_PARTIAL_PERMISSIONS_OPTIMIZED].Increment(1);
947948
error.Code = TStatus::DISALLOW_TEMP;
948949
error.Reason = "You cannot get two or more disks from the same group at the same time"
949-
" without specifying the PartialPermissionAllowed parameter";
950+
" in partial permissions allowed mode";
950951
error.Deadline = defaultDeadline;
951952
return false;
952953
}

ydb/core/cms/cms_maintenance_api_ut.cpp

+21
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,27 @@ Y_UNIT_TEST_SUITE(TMaintenanceApiTest) {
9595
UNIT_ASSERT_VALUES_EQUAL(a2.reason(), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS);
9696
UNIT_ASSERT(a2.reason_details().Contains("too many unavailable vdisks"));
9797
}
98+
99+
Y_UNIT_TEST(SimplifiedMirror3DC) {
100+
TTestEnvOpts options(3);
101+
options.UseMirror3dcErasure = true;
102+
options.DataCenterCount = 3;
103+
TCmsTestEnv env(options);
104+
105+
auto response = env.CheckMaintenanceTaskCreate(
106+
"task-1",
107+
Ydb::StatusIds::SUCCESS,
108+
Ydb::Maintenance::AVAILABILITY_MODE_WEAK,
109+
MakeActionGroup(
110+
MakeLockAction(env.GetNodeId(0), TDuration::Minutes(10))
111+
)
112+
);
113+
114+
UNIT_ASSERT_VALUES_EQUAL(response.action_group_states().size(), 1);
115+
UNIT_ASSERT_VALUES_EQUAL(response.action_group_states(0).action_states().size(), 1);
116+
const auto &a = response.action_group_states(0).action_states(0);
117+
UNIT_ASSERT_VALUES_EQUAL(a.status(), ActionState::ACTION_STATUS_PERFORMED);
118+
}
98119
}
99120

100121
} // namespace NKikimr::NCmsTest

ydb/core/cms/cms_ut_common.cpp

+46-22
Original file line numberDiff line numberDiff line change
@@ -239,20 +239,21 @@ class TFakeTenantPool : public TActorBootstrapped<TFakeTenantPool> {
239239

240240
void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseConfig *config,
241241
ui32 pdisks, ui32 vdiskPerPdisk = 4, const TNodeTenantsMap &tenants = {}, bool useMirror3dcErasure = false)
242-
{
242+
{
243+
constexpr ui32 MIRROR_3DC_VDISKS_COUNT = 9;
244+
constexpr ui32 BLOCK_4_2_VDISKS_COUNT = 8;
245+
243246
ui32 numNodes = runtime.GetNodeCount();
244-
ui32 numNodeGroups = pdisks * vdiskPerPdisk;
247+
ui32 vdisksPerNode = pdisks * vdiskPerPdisk;
245248
ui32 numGroups;
246-
247-
if (numNodes < 9)
248-
useMirror3dcErasure = false;
249-
250249
if (useMirror3dcErasure)
251-
numGroups = numNodes * numNodeGroups / 9;
252-
else if (numNodes >= 8)
253-
numGroups = numNodes * numNodeGroups / 8;
250+
numGroups = numNodes * vdisksPerNode / MIRROR_3DC_VDISKS_COUNT;
251+
else if (numNodes >= BLOCK_4_2_VDISKS_COUNT)
252+
numGroups = numNodes * vdisksPerNode / BLOCK_4_2_VDISKS_COUNT;
254253
else
255-
numGroups = numNodes * numNodeGroups;
254+
numGroups = numNodes * vdisksPerNode;
255+
256+
ui32 maxOneGroupVdisksPerNode = useMirror3dcErasure && numNodes < MIRROR_3DC_VDISKS_COUNT ? 3 : 1;
256257

257258
auto now = runtime.GetTimeProvider()->Now();
258259
for (ui32 groupId = 0; groupId < numGroups; ++groupId) {
@@ -261,7 +262,7 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC
261262
group.SetGroupGeneration(1);
262263
if (useMirror3dcErasure)
263264
group.SetErasureSpecies("mirror-3-dc");
264-
else if (numNodes >= 8)
265+
else if (numNodes >= BLOCK_4_2_VDISKS_COUNT)
265266
group.SetErasureSpecies("block-4-2");
266267
else
267268
group.SetErasureSpecies("none");
@@ -284,12 +285,18 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC
284285
} else {
285286
node.SystemStateInfo.AddRoles("Storage");
286287
}
287-
288-
ui32 groupShift = (nodeIndex / 8) * pdisks * vdiskPerPdisk;
289-
if (numNodes < 8)
290-
groupShift = nodeIndex * numNodeGroups;
291-
if (useMirror3dcErasure)
292-
groupShift = (nodeIndex / 9) * pdisks * vdiskPerPdisk;
288+
289+
ui32 groupsPerNode = vdisksPerNode / maxOneGroupVdisksPerNode;
290+
ui32 groupShift;
291+
if (useMirror3dcErasure) {
292+
ui32 groupNodesSize = MIRROR_3DC_VDISKS_COUNT / maxOneGroupVdisksPerNode;
293+
groupShift = (nodeIndex / groupNodesSize) * groupsPerNode;
294+
} else if (numNodes >= BLOCK_4_2_VDISKS_COUNT) {
295+
ui32 groupNodesSize = BLOCK_4_2_VDISKS_COUNT / maxOneGroupVdisksPerNode;
296+
groupShift = (nodeIndex / groupNodesSize) * groupsPerNode;
297+
} else {
298+
groupShift = nodeIndex * groupsPerNode;
299+
}
293300

294301
for (ui32 pdiskIndex = 0; pdiskIndex < pdisks; ++pdiskIndex) {
295302
auto pdiskId = nodeId * pdisks + pdiskIndex;
@@ -316,12 +323,28 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC
316323

317324
for (ui8 vdiskIndex = 0; vdiskIndex < vdiskPerPdisk; ++vdiskIndex) {
318325
ui32 vdiskId = pdiskIndex * vdiskPerPdisk + vdiskIndex;
319-
ui32 groupId = groupShift + vdiskId;
326+
ui32 groupId = groupShift + vdiskId / maxOneGroupVdisksPerNode;
327+
328+
if (groupId >= config->GroupSize()) {
329+
break;
330+
}
331+
320332
ui32 failRealm = 0;
321-
if (useMirror3dcErasure)
322-
failRealm = (nodeIndex % 9) / 3;
333+
if (useMirror3dcErasure) {
334+
if (numNodes >= MIRROR_3DC_VDISKS_COUNT) {
335+
failRealm = (nodeIndex % MIRROR_3DC_VDISKS_COUNT) / 3;
336+
} else {
337+
failRealm = nodeIndex % 3;
338+
}
339+
}
323340

324-
TVDiskID id = {(ui8)groupId, 1, (ui8)failRealm, (ui8)(nodeIndex % 8), (ui8)0};
341+
TVDiskID id = {
342+
(ui8)groupId,
343+
1,
344+
(ui8)failRealm,
345+
(ui8)(nodeIndex % BLOCK_4_2_VDISKS_COUNT),
346+
(ui8)(vdiskId % maxOneGroupVdisksPerNode)
347+
};
325348

326349
auto &vdisk = node.VDiskStateInfo[id];
327350
VDiskIDFromVDiskID(id, vdisk.MutableVDiskId());
@@ -339,7 +362,8 @@ void GenerateExtendedInfo(TTestActorRuntime &runtime, NKikimrBlobStorage::TBaseC
339362
vdiskConfig.SetGroupId(groupId);
340363
vdiskConfig.SetGroupGeneration(1);
341364
vdiskConfig.SetFailRealmIdx(failRealm);
342-
vdiskConfig.SetFailDomainIdx(nodeIndex % 8);
365+
vdiskConfig.SetFailDomainIdx(nodeIndex % BLOCK_4_2_VDISKS_COUNT);
366+
vdiskConfig.SetVDiskIdx(vdiskId % maxOneGroupVdisksPerNode);
343367

344368
config->MutableGroup(groupId)->AddVSlotId()
345369
->CopyFrom(vdiskConfig.GetVSlotId());

ydb/core/cms/cms_ut_common.h

+11-1
Original file line numberDiff line numberDiff line change
@@ -411,14 +411,15 @@ class TCmsTestEnv : public TTestBasicRuntime {
411411
Ydb::Maintenance::MaintenanceTaskResult CheckMaintenanceTaskCreate(
412412
const TString &taskUid,
413413
Ydb::StatusIds::StatusCode code,
414+
Ydb::Maintenance::AvailabilityMode availabilityMode,
414415
const Ts&... actionGroups)
415416
{
416417
auto ev = std::make_unique<NCms::TEvCms::TEvCreateMaintenanceTaskRequest>();
417418
ev->Record.SetUserSID("test-user");
418419

419420
auto *req = ev->Record.MutableRequest();
420421
req->mutable_task_options()->set_task_uid(taskUid);
421-
req->mutable_task_options()->set_availability_mode(Ydb::Maintenance::AVAILABILITY_MODE_STRONG);
422+
req->mutable_task_options()->set_availability_mode(availabilityMode);
422423
AddActionGroups(*req, actionGroups...);
423424

424425
SendToPipe(CmsId, Sender, ev.release(), 0, GetPipeConfigWithRetries());
@@ -430,6 +431,15 @@ class TCmsTestEnv : public TTestBasicRuntime {
430431
return rec.GetResult();
431432
}
432433

434+
template <typename... Ts>
435+
Ydb::Maintenance::MaintenanceTaskResult CheckMaintenanceTaskCreate(
436+
const TString &taskUid,
437+
Ydb::StatusIds::StatusCode code,
438+
const Ts&... actionGroups)
439+
{
440+
return CheckMaintenanceTaskCreate(taskUid, code, Ydb::Maintenance::AVAILABILITY_MODE_STRONG, actionGroups...);
441+
}
442+
433443
void EnableBSBaseConfig();
434444
void DisableBSBaseConfig();
435445

ydb/core/cms/erasure_checkers.cpp

+28-10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#include "erasure_checkers.h"
22

3+
#include <ydb/core/protos/counters_cms.pb.h>
4+
#include <ydb/core/tablet/tablet_counters.h>
5+
36
namespace NKikimr::NCms {
47

58
bool TErasureCounterBase::IsDown(const TVDiskInfo &vdisk, TClusterInfoPtr info, TDuration &retryTime, TErrorInfo &error) {
@@ -43,6 +46,10 @@ bool TErasureCounterBase::GroupAlreadyHasLockedDisks() const {
4346
return HasAlreadyLockedDisks;
4447
}
4548

49+
bool TErasureCounterBase::GroupHasMoreThanOneDiskPerNode() const {
50+
return HasMoreThanOneDiskPerNode;
51+
}
52+
4653
static TString DumpVDisksInfo(const THashMap<TVDiskID, TString>& vdisks, TClusterInfoPtr info) {
4754
if (vdisks.empty()) {
4855
return "<empty>";
@@ -121,11 +128,18 @@ bool TErasureCounterBase::CountVDisk(const TVDiskInfo &vdisk, TClusterInfoPtr in
121128
}
122129

123130
void TErasureCounterBase::CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) {
124-
for (const auto &vdId : info->BSGroup(GroupId).VDisks) {
125-
if (vdId != VDisk.VDiskId)
126-
CountVDisk(info->VDisk(vdId), info, retryTime, duration, error);
131+
const auto& group = info->BSGroup(GroupId);
132+
133+
TSet<ui32> groupNodes;
134+
for (const auto &vdId : group.VDisks) {
135+
const auto &vd = info->VDisk(vdId);
136+
if (vd.VDiskId != VDisk.VDiskId)
137+
CountVDisk(vd, info, retryTime, duration, error);
138+
groupNodes.insert(vd.NodeId);
127139
}
128140

141+
HasMoreThanOneDiskPerNode = group.VDisks.size() > groupNodes.size();
142+
129143
if (Locked && error.Code == TStatus::DISALLOW) {
130144
HasAlreadyLockedDisks = true;
131145
}
@@ -136,10 +150,11 @@ void TErasureCounterBase::CountGroupState(TClusterInfoPtr info, TDuration retryT
136150
bool TDefaultErasureCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo &error,
137151
TInstant &defaultDeadline, bool allowPartial) const
138152
{
139-
if (HasAlreadyLockedDisks && allowPartial) {
153+
if (HasAlreadyLockedDisks && !HasMoreThanOneDiskPerNode && allowPartial) {
154+
CmsCounters->Cumulative()[COUNTER_PARTIAL_PERMISSIONS_OPTIMIZED].Increment(1);
140155
error.Code = TStatus::DISALLOW_TEMP;
141156
error.Reason = "You cannot get two or more disks from the same group at the same time"
142-
" without specifying the PartialPermissionAllowed parameter";
157+
" in partial permissions allowed mode";
143158
error.Deadline = defaultDeadline;
144159
return false;
145160
}
@@ -170,10 +185,11 @@ bool TDefaultErasureCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErr
170185
bool TMirror3dcCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo &error,
171186
TInstant &defaultDeadline, bool allowPartial) const
172187
{
173-
if (HasAlreadyLockedDisks && allowPartial) {
188+
if (HasAlreadyLockedDisks && !HasMoreThanOneDiskPerNode && allowPartial) {
189+
CmsCounters->Cumulative()[COUNTER_PARTIAL_PERMISSIONS_OPTIMIZED].Increment(1);
174190
error.Code = TStatus::DISALLOW_TEMP;
175191
error.Reason = "You cannot get two or more disks from the same group at the same time"
176-
" without specifying the PartialPermissionAllowed parameter";
192+
" in partial permissions allowed mode";
177193
error.Deadline = defaultDeadline;
178194
return false;
179195
}
@@ -237,7 +253,9 @@ void TMirror3dcCounter::CountGroupState(TClusterInfoPtr info, TDuration retryTim
237253
++DataCenterDisabledNodes[VDisk.VDiskId.FailRealm];
238254
}
239255

240-
TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es, const TVDiskInfo &vdisk, ui32 groupId) {
256+
TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es,
257+
const TVDiskInfo &vdisk, ui32 groupId, TTabletCountersBase* cmsCounters)
258+
{
241259
switch (es) {
242260
case TErasureType::ErasureNone:
243261
case TErasureType::ErasureMirror3:
@@ -257,9 +275,9 @@ TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpe
257275
case TErasureType::Erasure2Plus2Block:
258276
case TErasureType::Erasure2Plus2Stripe:
259277
case TErasureType::ErasureMirror3of4:
260-
return TSimpleSharedPtr<IErasureCounter>(new TDefaultErasureCounter(vdisk, groupId));
278+
return TSimpleSharedPtr<IErasureCounter>(new TDefaultErasureCounter(vdisk, groupId, cmsCounters));
261279
case TErasureType::ErasureMirror3dc:
262-
return TSimpleSharedPtr<IErasureCounter>(new TMirror3dcCounter(vdisk, groupId));
280+
return TSimpleSharedPtr<IErasureCounter>(new TMirror3dcCounter(vdisk, groupId, cmsCounters));
263281
default:
264282
Y_ABORT("Unknown erasure type: %d", es);
265283
}

ydb/core/cms/erasure_checkers.h

+14-6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class IErasureCounter {
2020
virtual ~IErasureCounter() = default;
2121

2222
virtual bool GroupAlreadyHasLockedDisks() const = 0;
23+
virtual bool GroupHasMoreThanOneDiskPerNode() const = 0;
2324
virtual bool CheckForMaxAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const = 0;
2425
virtual bool CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const = 0;
2526
virtual void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) = 0;
@@ -33,29 +34,35 @@ class TErasureCounterBase: public IErasureCounter {
3334
const TVDiskInfo& VDisk;
3435
const ui32 GroupId;
3536
bool HasAlreadyLockedDisks;
37+
bool HasMoreThanOneDiskPerNode;
38+
39+
TTabletCountersBase* CmsCounters;
3640

3741
protected:
3842
bool IsDown(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration& retryTime, TErrorInfo& error);
3943
bool IsLocked(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration& retryTime, TDuration& duration, TErrorInfo& error);
4044
bool CountVDisk(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) override;
4145

4246
public:
43-
TErasureCounterBase(const TVDiskInfo& vdisk, ui32 groupId)
47+
TErasureCounterBase(const TVDiskInfo& vdisk, ui32 groupId, TTabletCountersBase* cmsCounters)
4448
: VDisk(vdisk)
4549
, GroupId(groupId)
4650
, HasAlreadyLockedDisks(false)
51+
, HasMoreThanOneDiskPerNode(false)
52+
, CmsCounters(cmsCounters)
4753
{
4854
}
4955

5056
bool GroupAlreadyHasLockedDisks() const final;
57+
bool GroupHasMoreThanOneDiskPerNode() const final;
5158
bool CheckForMaxAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const final;
5259
void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) override;
5360
};
5461

5562
class TDefaultErasureCounter: public TErasureCounterBase {
5663
public:
57-
TDefaultErasureCounter(const TVDiskInfo& vdisk, ui32 groupId)
58-
: TErasureCounterBase(vdisk, groupId)
64+
TDefaultErasureCounter(const TVDiskInfo& vdisk, ui32 groupId, TTabletCountersBase* cmsCounters)
65+
: TErasureCounterBase(vdisk, groupId, cmsCounters)
5966
{
6067
}
6168

@@ -69,15 +76,16 @@ class TMirror3dcCounter: public TErasureCounterBase {
6976
bool CountVDisk(const TVDiskInfo& vdisk, TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo& error) override;
7077

7178
public:
72-
TMirror3dcCounter(const TVDiskInfo& vdisk, ui32 groupId)
73-
: TErasureCounterBase(vdisk, groupId)
79+
TMirror3dcCounter(const TVDiskInfo& vdisk, ui32 groupId, TTabletCountersBase* cmsCounters)
80+
: TErasureCounterBase(vdisk, groupId, cmsCounters)
7481
{
7582
}
7683

7784
bool CheckForKeepAvailability(TClusterInfoPtr info, TErrorInfo& error, TInstant& defaultDeadline, bool allowPartial) const override;
7885
void CountGroupState(TClusterInfoPtr info, TDuration retryTime, TDuration duration, TErrorInfo &error) override;
7986
};
8087

81-
TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es, const TVDiskInfo& vdisk, ui32 groupId);
88+
TSimpleSharedPtr<IErasureCounter> CreateErasureCounter(TErasureType::EErasureSpecies es,
89+
const TVDiskInfo &vdisk, ui32 groupId, TTabletCountersBase* cmsCounters);
8290

8391
} // namespace NKikimr::NCms

ydb/core/protos/counters_cms.proto

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ enum ESimpleCounters {
1616

1717
enum ECumulativeCounters {
1818
COUNTER_CUMULATIVE_IGNORE = 0;
19+
20+
COUNTER_PARTIAL_PERMISSIONS_OPTIMIZED = 1 [(CounterOpts) = {Name: "PartialPermissionsOptimized"}];
1921
}
2022

2123
enum EPercentileCounters {

0 commit comments

Comments
 (0)