Skip to content

Commit 2a7d675

Browse files
serbel324blinkov
authored andcommitted
Fix segfault in Group Layout Sanitizer, add stress tests with INACTIVE/FAULTY disks (#15516)
1 parent 5013853 commit 2a7d675

File tree

2 files changed

+133
-9
lines changed

2 files changed

+133
-9
lines changed

ydb/core/blobstorage/ut_blobstorage/sanitize_groups.cpp

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
#include <ydb/core/blobstorage/ut_blobstorage/lib/env.h>
2+
#include <ydb/core/blobstorage/ut_blobstorage/lib/common.h>
23
#include <ydb/core/mind/bscontroller/layout_helpers.h>
34

45
Y_UNIT_TEST_SUITE(GroupLayoutSanitizer) {
6+
using NBsController::TPDiskId;
7+
using NKikimrBlobStorage::EDriveStatus;
8+
59
bool CatchSanitizeRequests(ui32 /*nodeId*/, std::unique_ptr<IEventHandle>& ev) {
610
if (ev->GetTypeRewrite() == TEvBlobStorage::TEvControllerConfigRequest::EventType) {
711
const auto& request = ev->Get<TEvBlobStorage::TEvControllerConfigRequest>()->Record.GetRequest();
@@ -238,4 +242,122 @@ Y_UNIT_TEST_SUITE(GroupLayoutSanitizer) {
238242
Y_UNIT_TEST(ForbidMultipleRealmsOccupation) {
239243
TestMultipleRealmsOccupation(false);
240244
}
245+
246+
void StressTest(TBlobStorageGroupType groupType, ui32 dcs, ui32 racks, ui32 units) {
247+
const ui32 steps = 100;
248+
std::vector<TNodeLocation> locations;
249+
250+
MakeLocations(locations, dcs, racks, units, LocationGenerator);
251+
std::unique_ptr<TEnvironmentSetup> env;
252+
253+
CreateEnv(env, locations, groupType);
254+
env->Sim(TDuration::Minutes(3));
255+
env->UpdateSettings(false, false, false);
256+
257+
std::vector<TPDiskId> pdisks;
258+
259+
{
260+
auto cfg = env->FetchBaseConfig();
261+
for (const auto& pdisk : cfg.GetPDisk()) {
262+
pdisks.emplace_back(pdisk.GetNodeId(), pdisk.GetPDiskId());
263+
}
264+
}
265+
266+
auto shuffleLocations = [&]() {
267+
TString error;
268+
env->Cleanup();
269+
std::random_shuffle(locations.begin(), locations.end());
270+
env->Initialize();
271+
env->Sim(TDuration::Seconds(100));
272+
};
273+
274+
auto updateDriveStatus = [&](ui32 drives) {
275+
NKikimrBlobStorage::TConfigRequest request;
276+
request.SetIgnoreGroupFailModelChecks(true);
277+
request.SetIgnoreGroupSanityChecks(true);
278+
request.SetIgnoreDegradedGroupsChecks(true);
279+
request.SetIgnoreDisintegratedGroupsChecks(true);
280+
for (ui32 i = 0; i < drives; ++i) {
281+
auto* cmd = request.AddCommand();
282+
auto* drive = cmd->MutableUpdateDriveStatus();
283+
TPDiskId pdiskId = pdisks[RandomNumber<ui32>(pdisks.size())];
284+
drive->MutableHostKey()->SetNodeId(pdiskId.NodeId);
285+
drive->SetPDiskId(pdiskId.PDiskId);
286+
switch (RandomNumber<ui32>(7)) {
287+
case 0:
288+
drive->SetStatus(EDriveStatus::INACTIVE);
289+
break;
290+
case 1:
291+
drive->SetStatus(EDriveStatus::BROKEN);
292+
break;
293+
case 2:
294+
drive->SetStatus(EDriveStatus::FAULTY);
295+
break;
296+
default:
297+
drive->SetStatus(EDriveStatus::ACTIVE);
298+
}
299+
}
300+
301+
env->Invoke(request);
302+
};
303+
304+
enum class EActions {
305+
SHUFFLE_LOCATIONS = 0,
306+
UPDATE_STATUS,
307+
ENABLE_SANITIZER,
308+
DISABLE_SANITIZER,
309+
ENABLE_SELF_HEAL,
310+
DISABLE_SELF_HEAL,
311+
};
312+
TWeightedRandom<EActions> act;
313+
314+
act.AddValue(EActions::SHUFFLE_LOCATIONS, 1);
315+
act.AddValue(EActions::UPDATE_STATUS, 5);
316+
act.AddValue(EActions::ENABLE_SANITIZER, 1);
317+
act.AddValue(EActions::DISABLE_SANITIZER, 1);
318+
act.AddValue(EActions::ENABLE_SELF_HEAL, 1);
319+
act.AddValue(EActions::DISABLE_SELF_HEAL, 1);
320+
321+
bool selfHeal = false;
322+
bool groupLayoutSanitizer = false;
323+
324+
for (ui32 i = 0; i < steps; ++i) {
325+
switch (act.GetRandom()) {
326+
case EActions::SHUFFLE_LOCATIONS:
327+
shuffleLocations();
328+
break;
329+
case EActions::UPDATE_STATUS:
330+
updateDriveStatus(RandomNumber<ui32>(5) + 1);
331+
break;
332+
case EActions::ENABLE_SANITIZER:
333+
groupLayoutSanitizer = true;
334+
env->UpdateSettings(selfHeal, false, groupLayoutSanitizer);
335+
break;
336+
case EActions::DISABLE_SANITIZER:
337+
groupLayoutSanitizer = false;
338+
env->UpdateSettings(selfHeal, false, groupLayoutSanitizer);
339+
break;
340+
case EActions::ENABLE_SELF_HEAL:
341+
selfHeal = true;
342+
env->UpdateSettings(selfHeal, false, groupLayoutSanitizer);
343+
break;
344+
case EActions::DISABLE_SELF_HEAL:
345+
selfHeal = false;
346+
env->UpdateSettings(selfHeal, false, groupLayoutSanitizer);
347+
break;
348+
}
349+
}
350+
}
351+
352+
Y_UNIT_TEST(StressMirror3dc) {
353+
StressTest(TBlobStorageGroupType::ErasureMirror3dc, 3, 5, 1);
354+
}
355+
356+
Y_UNIT_TEST(StressBlock4Plus2) {
357+
StressTest(TBlobStorageGroupType::Erasure4Plus2Block, 1, 10, 2);
358+
}
359+
360+
Y_UNIT_TEST(StressMirror3of4) {
361+
StressTest(TBlobStorageGroupType::ErasureMirror3of4, 1, 10, 2);
362+
}
241363
}

ydb/core/mind/bscontroller/group_mapper.cpp

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -795,15 +795,17 @@ namespace NKikimr::NBsController {
795795
}
796796

797797
for (ui32 orderNum = 0; orderNum < group.size(); ++orderNum) {
798-
const TVDiskIdShort vdisk = Topology.GetVDiskId(orderNum);
799-
ui32 pRealm = group[orderNum]->Position.Realm.Index();
800-
ui32 desiredPRealm = RealmNavigator[vdisk.FailRealm];
801-
if (pRealm != desiredPRealm) {
802-
if (realmOccupation[pRealm].size() > 1) {
803-
// disks from different fail realms in one Realm present
804-
failDetected(EFailLevel::REALM_FAIL, orderNum);
805-
} else {
806-
failDetected(EFailLevel::MULTIPLE_REALM_OCCUPATION, orderNum);
798+
if (group[orderNum]) {
799+
const TVDiskIdShort vdisk = Topology.GetVDiskId(orderNum);
800+
ui32 pRealm = group[orderNum]->Position.Realm.Index();
801+
ui32 desiredPRealm = RealmNavigator[vdisk.FailRealm];
802+
if (pRealm != desiredPRealm) {
803+
if (realmOccupation[pRealm].size() > 1) {
804+
// disks from different fail realms in one Realm present
805+
failDetected(EFailLevel::REALM_FAIL, orderNum);
806+
} else {
807+
failDetected(EFailLevel::MULTIPLE_REALM_OCCUPATION, orderNum);
808+
}
807809
}
808810
}
809811
}

0 commit comments

Comments
 (0)