Skip to content

Commit 2876ba4

Browse files
authored
Fix TSlay and THarakiri in ReadOnly mode (#13162)
1 parent 63b0998 commit 2876ba4

File tree

8 files changed

+345
-96
lines changed

8 files changed

+345
-96
lines changed

ydb/apps/dstool/lib/dstool_cmd_cluster_workload_run.py

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def add_options(p):
1919
p.add_argument('--enable-kill-tablets', action='store_true', help='Enable tablet killer')
2020
p.add_argument('--enable-kill-blob-depot', action='store_true', help='Enable BlobDepot killer')
2121
p.add_argument('--enable-restart-pdisks', action='store_true', help='Enable PDisk restarter')
22+
p.add_argument('--enable-readonly-pdisks', action='store_true', help='Enable SetPDiskReadOnly requests')
2223
p.add_argument('--kill-signal', type=str, default='KILL', help='Kill signal to send to restart node')
2324
p.add_argument('--sleep-before-rounds', type=float, default=1, help='Seconds to sleep before rounds')
2425
p.add_argument('--no-fail-model-check', action='store_true', help='Do not check VDisk states before taking action')
@@ -104,6 +105,12 @@ def do(args):
104105
if vslot.ReadOnly
105106
}
106107

108+
pdisk_readonly = {
109+
(pdisk.NodeId, pdisk.PDiskId)
110+
for pdisk in base_config.PDisk
111+
if pdisk.ReadOnly
112+
}
113+
107114
if (len(pdisk_keys) == 0):
108115
# initialize pdisk_keys and pdisk_key_versions
109116
for node_id in {pdisk.NodeId for pdisk in base_config.PDisk}:
@@ -144,6 +151,25 @@ def match(x):
144151
return False
145152
return True
146153

154+
def can_act_on_pdisk(node_id, pdisk_id):
155+
def match(x):
156+
return node_id == x[0] and pdisk_id == x[1]
157+
158+
for group in base_config.Group:
159+
if any(map(match, map(common.get_vslot_id, group.VSlotId))):
160+
if not common.is_dynamic_group(group.GroupId):
161+
return False
162+
163+
content = {
164+
common.get_vdisk_id_short(vslot): not match(vslot_id) and vslot.Ready and vdisk_status[vslot_id + common.get_vdisk_id(vslot)]
165+
for vslot_id in map(common.get_vslot_id, group.VSlotId)
166+
for vslot in [vslot_map[vslot_id]]
167+
}
168+
common.print_if_verbose(args, content, file=sys.stderr)
169+
if not grouptool.check_fail_model(content, group.ErasureSpecies):
170+
return False
171+
return True
172+
147173
def do_restart(node_id):
148174
host = node_fqdn_map[node_id]
149175
if args.enable_pdisk_encryption_keys_changes:
@@ -166,6 +192,20 @@ def do_restart_pdisk(node_id, pdisk_id):
166192
if not response.Success:
167193
raise Exception('Unexpected error from BSC: %s' % response.ErrorDescription)
168194

195+
def do_readonly_pdisk(node_id, pdisk_id, readonly):
196+
assert can_act_on_vslot(node_id, pdisk_id)
197+
request = common.kikimr_bsconfig.TConfigRequest(IgnoreDegradedGroupsChecks=True)
198+
cmd = request.Command.add().SetPDiskReadOnly
199+
cmd.TargetPDiskId.NodeId = node_id
200+
cmd.TargetPDiskId.PDiskId = pdisk_id
201+
cmd.Value = readonly
202+
try:
203+
response = common.invoke_bsc_request(request)
204+
except Exception as e:
205+
raise Exception('failed to perform SetPDiskReadOnly request: %s' % e)
206+
if not response.Success:
207+
raise Exception('Unexpected error from BSC: %s' % response.ErrorDescription)
208+
169209
def do_evict(vslot_id):
170210
assert can_act_on_vslot(*vslot_id)
171211
try:
@@ -253,15 +293,16 @@ def do_kill_blob_depot():
253293
readonlies = []
254294
unreadonlies = []
255295
pdisk_restarts = []
296+
make_pdisks_readonly = []
297+
make_pdisks_not_readonly = []
256298

257299
for vslot in base_config.VSlot:
258300
if common.is_dynamic_group(vslot.GroupId):
259301
vslot_id = common.get_vslot_id(vslot.VSlotId)
302+
node_id, pdisk_id = vslot_id[:2]
260303
vdisk_id = '[%08x:%d:%d:%d]' % (vslot.GroupId, vslot.FailRealmIdx, vslot.FailDomainIdx, vslot.VDiskIdx)
261304
if vslot_id in vslot_readonly and not args.disable_readonly:
262305
unreadonlies.append(('un-readonly vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_readonly, vslot, False)))
263-
if can_act_on_vslot(*vslot_id[:2]) and args.enable_restart_pdisks:
264-
pdisk_restarts.append(('restart pdisk node_id: %d, pdisk_id: %d' % vslot_id[:2], (do_restart_pdisk, *vslot_id[:2])))
265306
if can_act_on_vslot(*vslot_id) and (recent_restarts or args.disable_restarts):
266307
if not args.disable_evicts:
267308
evicts.append(('evict vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_evict, vslot_id)))
@@ -270,6 +311,18 @@ def do_kill_blob_depot():
270311
if not args.disable_readonly:
271312
readonlies.append(('readonly vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_readonly, vslot, True)))
272313

314+
for pdisk in base_config.PDisk:
315+
node_id, pdisk_id = pdisk.NodeId, pdisk.PDiskId
316+
317+
if can_act_on_pdisk(node_id, pdisk_id):
318+
if args.enable_restart_pdisks:
319+
pdisk_restarts.append(('restart pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_restart_pdisk, node_id, pdisk_id)))
320+
if args.enable_readonly_pdisks:
321+
make_pdisks_readonly.append(('readonly pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_readonly_pdisk, node_id, pdisk_id, True)))
322+
323+
if (node_id, pdisk_id) in pdisk_readonly and args.enable_readonly_pdisks:
324+
make_pdisks_not_readonly.append(('un-readonly pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_readonly_pdisk, node_id, pdisk_id, False)))
325+
273326
def pick(v):
274327
action_name, action = random.choice(v)
275328
print(action_name)
@@ -285,6 +338,10 @@ def pick(v):
285338
possible_actions.append(('un-readonly', (pick, unreadonlies)))
286339
if pdisk_restarts:
287340
possible_actions.append(('restart-pdisk', (pick, pdisk_restarts)))
341+
if make_pdisks_readonly:
342+
possible_actions.append(('make-pdisks-readonly', (pick, make_pdisks_readonly)))
343+
if make_pdisks_not_readonly:
344+
possible_actions.append(('make-pdisks-not-readonly', (pick, make_pdisks_not_readonly)))
288345

289346
restarts = []
290347

ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -410,21 +410,20 @@ namespace NKikimr::NStorage {
410410

411411
const TPDiskKey key(pdisk);
412412

413-
if (pdisk.HasReadOnly()) {
414-
if (auto it = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()}); it != LocalPDisks.end()) {
415-
auto& record = it->second;
413+
auto localPdiskIt = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()});
414+
if (localPdiskIt != LocalPDisks.end()) {
415+
auto& record = localPdiskIt->second;
416416

417-
if (!record.Record.HasReadOnly() || record.Record.GetReadOnly() != pdisk.GetReadOnly()) {
418-
// Changing read-only flag requires restart.
419-
entityStatus = NKikimrBlobStorage::RESTART;
420-
}
417+
if (record.Record.GetReadOnly() != pdisk.GetReadOnly()) {
418+
// Changing read-only flag requires restart.
419+
entityStatus = NKikimrBlobStorage::RESTART;
421420
}
422421
}
423422

424423
switch (entityStatus) {
425424
case NKikimrBlobStorage::RESTART:
426-
if (auto it = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()}); it != LocalPDisks.end()) {
427-
it->second.Record = pdisk;
425+
if (localPdiskIt != LocalPDisks.end()) {
426+
localPdiskIt->second.Record = pdisk;
428427
}
429428
DoRestartLocalPDisk(pdisk);
430429
[[fallthrough]];

ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3788,8 +3788,6 @@ bool TPDisk::HandleReadOnlyIfWrite(TRequestBase *request) {
37883788
case ERequestType::RequestChunkReadPiece:
37893789
case ERequestType::RequestYardInit:
37903790
case ERequestType::RequestCheckSpace:
3791-
case ERequestType::RequestHarakiri:
3792-
case ERequestType::RequestYardSlay:
37933791
case ERequestType::RequestYardControl:
37943792
case ERequestType::RequestWhiteboartReport:
37953793
case ERequestType::RequestHttpInfo:
@@ -3805,16 +3803,16 @@ bool TPDisk::HandleReadOnlyIfWrite(TRequestBase *request) {
38053803

38063804
// Can't be processed in read-only mode.
38073805
case ERequestType::RequestLogWrite: {
3808-
TLogWrite &ev = *static_cast<TLogWrite*>(request);
3806+
TLogWrite &req = *static_cast<TLogWrite*>(request);
38093807
NPDisk::TEvLogResult* result = new NPDisk::TEvLogResult(NKikimrProto::CORRUPTED, 0, errorReason);
3810-
result->Results.push_back(NPDisk::TEvLogResult::TRecord(ev.Lsn, ev.Cookie));
3808+
result->Results.push_back(NPDisk::TEvLogResult::TRecord(req.Lsn, req.Cookie));
38113809
PCtx->ActorSystem->Send(sender, result);
3812-
ev.Replied = true;
3810+
req.Replied = true;
38133811
return true;
38143812
}
38153813
case ERequestType::RequestChunkWrite: {
3816-
TChunkWrite &ev = *static_cast<TChunkWrite*>(request);
3817-
SendChunkWriteError(ev, errorReason, NKikimrProto::CORRUPTED);
3814+
TChunkWrite &req = *static_cast<TChunkWrite*>(request);
3815+
SendChunkWriteError(req, errorReason, NKikimrProto::CORRUPTED);
38183816
return true;
38193817
}
38203818
case ERequestType::RequestChunkReserve:
@@ -3829,6 +3827,18 @@ bool TPDisk::HandleReadOnlyIfWrite(TRequestBase *request) {
38293827
case ERequestType::RequestChunkForget:
38303828
PCtx->ActorSystem->Send(sender, new NPDisk::TEvChunkForgetResult(NKikimrProto::CORRUPTED, 0, errorReason));
38313829
return true;
3830+
case ERequestType::RequestHarakiri:
3831+
PCtx->ActorSystem->Send(sender, new NPDisk::TEvHarakiriResult(NKikimrProto::CORRUPTED, 0, errorReason));
3832+
return true;
3833+
case ERequestType::RequestYardSlay: {
3834+
TSlay &req = *static_cast<TSlay*>(request);
3835+
// We send NOTREADY, since BSController can't handle CORRUPTED or ERROR.
3836+
// If for some reason the disk will become *not* read-only, the request will be retried and VDisk will be slain.
3837+
// If not, we will be retrying the request until the disk is replaced during maintenance.
3838+
PCtx->ActorSystem->Send(sender, new NPDisk::TEvSlayResult(NKikimrProto::NOTREADY, 0,
3839+
req.VDiskId, req.SlayOwnerRound, req.PDiskId, req.VSlotId, errorReason));
3840+
return true;
3841+
}
38323842

38333843
case ERequestType::RequestWriteMetadata:
38343844
case ERequestType::RequestWriteMetadataResult:

ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp

Lines changed: 65 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1359,34 +1359,74 @@ Y_UNIT_TEST_SUITE(ReadOnlyPDisk) {
13591359

13601360
vdisk.Init(); // Should start ok.
13611361
vdisk.ReadLog(); // Should be able to read log.
1362-
{
1363-
// Should fail on writing log.
1364-
auto evLog = MakeHolder<NPDisk::TEvLog>(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, 0, TRcBuf(PrepareData(1)),
1365-
TLsnSeg(), nullptr);
1366-
auto res = testCtx.TestResponse<NPDisk::TEvLogResult>(evLog.Release(), NKikimrProto::CORRUPTED);
1367-
1368-
UNIT_ASSERT_STRING_CONTAINS(res->ErrorReason, "PDisk is in read-only mode");
1369-
}
1370-
{
1371-
// Should fail on reserving chunk.
1372-
auto res = testCtx.TestResponse<NPDisk::TEvChunkReserveResult>(
1373-
new NPDisk::TEvChunkReserve(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, 1),
1374-
NKikimrProto::CORRUPTED);
1362+
}
13751363

1376-
UNIT_ASSERT_STRING_CONTAINS(res->ErrorReason, "PDisk is in read-only mode");
1377-
}
1378-
{
1379-
// Should fail on writing chunk.
1380-
TString chunkWriteData = PrepareData(1);
1381-
auto counter = MakeIntrusive<::NMonitoring::TCounterForPtr>();
1382-
TMemoryConsumer consumer(counter);
1383-
TTrackableBuffer buffer(std::move(consumer), chunkWriteData.data(), chunkWriteData.size());
1384-
auto res = testCtx.TestResponse<NPDisk::TEvChunkWriteResult>(
1385-
new NPDisk::TEvChunkWrite(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound,
1386-
0, 0, new NPDisk::TEvChunkWrite::TBufBackedUpParts(std::move(buffer)), nullptr, false, 0),
1387-
NKikimrProto::CORRUPTED);
1364+
template <class Request, class Response, NKikimrProto::EReplyStatus ExpectedStatus = NKikimrProto::CORRUPTED, class... Args>
1365+
auto CheckReadOnlyRequest(Args&&... args) {
1366+
return [args = std::make_tuple(std::forward<Args>(args)...)](TActorTestContext& testCtx) {
1367+
Request* req = std::apply([](auto&&... unpackedArgs) {
1368+
return new Request(std::forward<decltype(unpackedArgs)>(unpackedArgs)...);
1369+
}, args);
13881370

1371+
THolder<Response> res = testCtx.TestResponse<Response>(req);
1372+
1373+
UNIT_ASSERT_VALUES_EQUAL(res->Status, ExpectedStatus);
13891374
UNIT_ASSERT_STRING_CONTAINS(res->ErrorReason, "PDisk is in read-only mode");
1375+
};
1376+
}
1377+
1378+
Y_UNIT_TEST(ReadOnlyPDiskEvents) {
1379+
TActorTestContext testCtx{{}};
1380+
TVDiskMock vdisk(&testCtx);
1381+
vdisk.InitFull();
1382+
vdisk.SendEvLogSync();
1383+
1384+
auto cfg = testCtx.GetPDiskConfig();
1385+
cfg->ReadOnly = true;
1386+
testCtx.UpdateConfigRecreatePDisk(cfg);
1387+
1388+
vdisk.Init();
1389+
vdisk.ReadLog();
1390+
1391+
std::vector<std::function<void(TActorTestContext&)>> eventSenders = {
1392+
// Should fail on writing log. (ERequestType::RequestLogWrite)
1393+
CheckReadOnlyRequest<NPDisk::TEvLog, NPDisk::TEvLogResult>(
1394+
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, 0,
1395+
TRcBuf(PrepareData(1)), TLsnSeg(), nullptr
1396+
),
1397+
// Should fail on writing chunk. (ERequestType::RequestChunkWrite)
1398+
CheckReadOnlyRequest<NPDisk::TEvChunkWrite, NPDisk::TEvChunkWriteResult>(
1399+
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound,
1400+
0, 0, nullptr, nullptr, false, 0
1401+
),
1402+
// Should fail on reserving chunk. (ERequestType::RequestChunkReserve)
1403+
CheckReadOnlyRequest<NPDisk::TEvChunkReserve, NPDisk::TEvChunkReserveResult>(
1404+
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, 1
1405+
),
1406+
// Should fail on chunk lock. (ERequestType::RequestChunkLock)
1407+
CheckReadOnlyRequest<NPDisk::TEvChunkLock, NPDisk::TEvChunkLockResult>(
1408+
NPDisk::TEvChunkLock::ELockFrom::LOG, 0, NKikimrBlobStorage::TPDiskSpaceColor::YELLOW
1409+
),
1410+
// Should fail on chunk unlock. (ERequestType::RequestChunkUnlock)
1411+
CheckReadOnlyRequest<NPDisk::TEvChunkUnlock, NPDisk::TEvChunkUnlockResult>(
1412+
NPDisk::TEvChunkLock::ELockFrom::LOG
1413+
),
1414+
// Should fail on chunk forget. (ERequestType::RequestChunkForget)
1415+
CheckReadOnlyRequest<NPDisk::TEvChunkForget, NPDisk::TEvChunkForgetResult>(
1416+
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound + 1
1417+
),
1418+
// Should fail on harakiri. (ERequestType::RequestHarakiri)
1419+
CheckReadOnlyRequest<NPDisk::TEvHarakiri, NPDisk::TEvHarakiriResult>(
1420+
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound + 1
1421+
),
1422+
// Should fail on slaying vdisk. (ERequestType::RequestYardSlay)
1423+
CheckReadOnlyRequest<NPDisk::TEvSlay, NPDisk::TEvSlayResult, NKikimrProto::NOTREADY>(
1424+
vdisk.VDiskID, vdisk.PDiskParams->OwnerRound + 1, 0, 0
1425+
)
1426+
};
1427+
1428+
for (auto sender : eventSenders) {
1429+
sender(testCtx);
13901430
}
13911431
}
13921432
}

ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,8 +447,17 @@ class TPDiskMockActor : public TActorBootstrapped<TPDiskMockActor> {
447447
void Handle(NPDisk::TEvSlay::TPtr ev) {
448448
auto *msg = ev->Get();
449449
PDISK_MOCK_LOG(INFO, PDM17, "received TEvSlay", (Msg, msg->ToString()));
450+
450451
auto res = std::make_unique<NPDisk::TEvSlayResult>(NKikimrProto::OK, GetStatusFlags(), msg->VDiskId,
451452
msg->SlayOwnerRound, msg->PDiskId, msg->VSlotId, TString());
453+
454+
if (Impl.IsDiskReadOnly) {
455+
res->Status = NKikimrProto::NOTREADY;
456+
res->ErrorReason = "PDisk is in read-only mode";
457+
Send(ev->Sender, res.release());
458+
return;
459+
}
460+
452461
bool found = false;
453462
for (auto& [ownerId, owner] : Impl.Owners) {
454463
if (!owner.VDiskId.SameExceptGeneration(msg->VDiskId)) {

0 commit comments

Comments
 (0)