Skip to content

Commit bce4f3a

Browse files
authored
24-3-9-hotfix: schemeshard: fix crash on concurrent alter-extsubdomains (#9203)
merge 5c36245(#9157), ...(#9198) from `main` KIKIMR-21965 - enforce creation of `TAlterExtSubDomain` operations only through `CreateCompatibleAlterExtSubDomain()` to ensure all proper state checking - fix `TAlterExtSubDomain` compatibility with ESchemeOpAlterSubDomain
1 parent 0e6dc77 commit bce4f3a

File tree

5 files changed

+126
-11
lines changed

5 files changed

+126
-11
lines changed

ydb/core/tx/schemeshard/schemeshard__operation.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1263,7 +1263,7 @@ TVector<ISubOperation::TPtr> TOperation::ConstructParts(const TTxTransaction& tx
12631263
case NKikimrSchemeOp::EOperationType::ESchemeOpCreateSubDomain:
12641264
return {CreateSubDomain(NextPartId(), tx)};
12651265
case NKikimrSchemeOp::EOperationType::ESchemeOpAlterSubDomain:
1266-
return {CreateCompatibleSubdomainAlter(context.SS, NextPartId(), tx)};
1266+
return CreateCompatibleSubdomainAlter(NextPartId(), tx, context);
12671267
case NKikimrSchemeOp::EOperationType::ESchemeOpDropSubDomain:
12681268
return {CreateDropSubdomain(NextPartId(), tx)};
12691269
case NKikimrSchemeOp::EOperationType::ESchemeOpForceDropSubDomain:

ydb/core/tx/schemeshard/schemeshard__operation_alter_extsubdomain.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,7 @@ class TSyncHive: public TSubOperationState {
814814
}
815815

816816
bool HandleReply(TEvHive::TEvUpdateDomainReply::TPtr& ev, TOperationContext& context) override {
817-
const TTabletId hive = TTabletId(ev->Get()->Record.GetOrigin());
817+
const TTabletId hive = TTabletId(ev->Get()->Record.GetOrigin());
818818

819819
LOG_I(DebugHint() << "HandleReply TEvUpdateDomainReply"
820820
<< ", from hive: " << hive);
@@ -1084,7 +1084,13 @@ ISubOperation::TPtr CreateAlterExtSubDomain(TOperationId id, TTxState::ETxState
10841084
}
10851085

10861086
TVector<ISubOperation::TPtr> CreateCompatibleAlterExtSubDomain(TOperationId id, const TTxTransaction& tx, TOperationContext& context) {
1087-
Y_ABORT_UNLESS(tx.GetOperationType() == NKikimrSchemeOp::ESchemeOpAlterExtSubDomain);
1087+
//NOTE: Accepting ESchemeOpAlterSubDomain operation for an ExtSubDomain is a special compatibility case
1088+
// for those old subdomains that at the time went through migration to a separate tenants.
1089+
// Console tablet holds records about types of the subdomains but they hadn't been updated
1090+
// at the migration time. So Console still thinks that old subdomains are plain subdomains
1091+
// whereas they had been migrated to the extsubdomains.
1092+
// This compatibility case should be upholded until Console records would be updated.
1093+
Y_ABORT_UNLESS(tx.GetOperationType() == NKikimrSchemeOp::ESchemeOpAlterExtSubDomain || tx.GetOperationType() == NKikimrSchemeOp::ESchemeOpAlterSubDomain);
10881094

10891095
LOG_I("CreateCompatibleAlterExtSubDomain, opId " << id
10901096
<< ", feature flag EnableAlterDatabaseCreateHiveFirst " << context.SS->EnableAlterDatabaseCreateHiveFirst

ydb/core/tx/schemeshard/schemeshard__operation_part.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,7 @@ ISubOperation::TPtr CreateAlterSubDomain(TOperationId id, const TTxTransaction&
493493
ISubOperation::TPtr CreateAlterSubDomain(TOperationId id, TTxState::ETxState state);
494494

495495
ISubOperation::TPtr CreateCompatibleSubdomainDrop(TSchemeShard* ss, TOperationId id, const TTxTransaction& tx);
496-
ISubOperation::TPtr CreateCompatibleSubdomainAlter(TSchemeShard* ss, TOperationId id, const TTxTransaction& tx);
496+
TVector<ISubOperation::TPtr> CreateCompatibleSubdomainAlter(TOperationId id, const TTxTransaction& tx, TOperationContext& context);
497497

498498
ISubOperation::TPtr CreateUpgradeSubDomain(TOperationId id, const TTxTransaction& tx);
499499
ISubOperation::TPtr CreateUpgradeSubDomain(TOperationId id, TTxState::ETxState state);
@@ -514,10 +514,10 @@ ISubOperation::TPtr CreateExtSubDomain(TOperationId id, TTxState::ETxState state
514514

515515
// Alter
516516
TVector<ISubOperation::TPtr> CreateCompatibleAlterExtSubDomain(TOperationId nextId, const TTxTransaction& tx, TOperationContext& context);
517-
ISubOperation::TPtr CreateAlterExtSubDomain(TOperationId id, const TTxTransaction& tx);
518517
ISubOperation::TPtr CreateAlterExtSubDomain(TOperationId id, TTxState::ETxState state);
519-
ISubOperation::TPtr CreateAlterExtSubDomainCreateHive(TOperationId id, const TTxTransaction& tx);
520518
ISubOperation::TPtr CreateAlterExtSubDomainCreateHive(TOperationId id, TTxState::ETxState state);
519+
//NOTE: no variants to construct individual suboperations directly from TTxTransaction --
520+
// -- it should be possible only through CreateCompatibleAlterExtSubDomain
521521

522522
// Drop
523523
ISubOperation::TPtr CreateForceDropExtSubDomain(TOperationId id, const TTxTransaction& tx);

ydb/core/tx/schemeshard/schemeshard__operation_upgrade_subdomain.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1530,13 +1530,13 @@ ISubOperation::TPtr CreateCompatibleSubdomainDrop(TSchemeShard* ss, TOperationId
15301530
return CreateForceDropSubDomain(id, tx);
15311531
}
15321532

1533-
ISubOperation::TPtr CreateCompatibleSubdomainAlter(TSchemeShard* ss, TOperationId id, const TTxTransaction& tx) {
1533+
TVector<ISubOperation::TPtr> CreateCompatibleSubdomainAlter(TOperationId id, const TTxTransaction& tx, TOperationContext& context) {
15341534
const auto& info = tx.GetSubDomain();
15351535

15361536
const TString& parentPathStr = tx.GetWorkingDir();
15371537
const TString& name = info.GetName();
15381538

1539-
TPath path = TPath::Resolve(parentPathStr, ss).Dive(name);
1539+
TPath path = TPath::Resolve(parentPathStr, context.SS).Dive(name);
15401540

15411541
{
15421542
TPath::TChecker checks = path.Check();
@@ -1546,15 +1546,16 @@ ISubOperation::TPtr CreateCompatibleSubdomainAlter(TSchemeShard* ss, TOperationI
15461546
.NotDeleted();
15471547

15481548
if (!checks) {
1549-
return CreateAlterSubDomain(id, tx);
1549+
return {CreateAlterSubDomain(id, tx)};
15501550
}
15511551
}
15521552

15531553
if (path.Base()->IsExternalSubDomainRoot()) {
1554-
return CreateAlterExtSubDomain(id, tx);
1554+
// plain subdomains don't have subdomain/tenant hives so only single operation should be returned here
1555+
return CreateCompatibleAlterExtSubDomain(id, tx, context);
15551556
}
15561557

1557-
return CreateAlterSubDomain(id, tx);
1558+
return {CreateAlterSubDomain(id, tx)};
15581559
}
15591560

15601561
}

ydb/core/tx/schemeshard/ut_extsubdomain/ut_extsubdomain.cpp

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,114 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) {
322322
NLs::UserAttrsEqual({{"user__attr_1", "value"}})});
323323
}
324324

325+
Y_UNIT_TEST_FLAG(AlterWithPlainAlterSubdomain, AlterDatabaseCreateHiveFirst) {
326+
TTestBasicRuntime runtime;
327+
TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst));
328+
ui64 txId = 100;
329+
330+
// Create extsubdomain
331+
332+
TestCreateExtSubDomain(runtime, ++txId, "/MyRoot",
333+
R"(Name: "USER_0")"
334+
);
335+
TestAlterExtSubDomain(runtime, ++txId, "/MyRoot",
336+
R"(
337+
Name: "USER_0"
338+
ExternalSchemeShard: true
339+
PlanResolution: 50
340+
Coordinators: 1
341+
Mediators: 1
342+
TimeCastBucketsPerMediator: 2
343+
StoragePools {
344+
Name: "pool-1"
345+
Kind: "hdd"
346+
}
347+
)"
348+
);
349+
env.TestWaitNotification(runtime, {txId, txId - 1});
350+
351+
// Altering extsubdomain but with plain altersubdomain should succeed
352+
// (post tenant migration compatibility)
353+
354+
//NOTE: SubDomain and not ExtSubdomain
355+
TestAlterSubDomain(runtime, ++txId, "/MyRoot",
356+
R"(
357+
Name: "USER_0"
358+
ExternalSchemeShard: true
359+
PlanResolution: 50
360+
Coordinators: 1
361+
Mediators: 1
362+
TimeCastBucketsPerMediator: 2
363+
StoragePools {
364+
Name: "pool-1"
365+
Kind: "hdd"
366+
}
367+
)"
368+
);
369+
env.TestWaitNotification(runtime, txId);
370+
}
371+
372+
Y_UNIT_TEST_FLAG(AlterTwiceAndWithPlainAlterSubdomain, AlterDatabaseCreateHiveFirst) {
373+
TTestBasicRuntime runtime;
374+
TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst));
375+
ui64 txId = 100;
376+
377+
TestCreateExtSubDomain(runtime, ++txId, "/MyRoot",
378+
R"(Name: "USER_0")"
379+
);
380+
TestAlterExtSubDomain(runtime, ++txId, "/MyRoot",
381+
R"(
382+
Name: "USER_0"
383+
ExternalSchemeShard: true
384+
PlanResolution: 50
385+
Coordinators: 1
386+
Mediators: 1
387+
TimeCastBucketsPerMediator: 2
388+
StoragePools {
389+
Name: "pool-1"
390+
Kind: "hdd"
391+
}
392+
)"
393+
);
394+
env.TestWaitNotification(runtime, {txId, txId - 1});
395+
396+
AsyncAlterExtSubDomain(runtime, ++txId, "/MyRoot",
397+
R"(
398+
Name: "USER_0"
399+
ExternalSchemeShard: true
400+
PlanResolution: 50
401+
Coordinators: 1
402+
Mediators: 1
403+
TimeCastBucketsPerMediator: 2
404+
StoragePools {
405+
Name: "pool-1"
406+
Kind: "hdd"
407+
}
408+
)"
409+
);
410+
// TestModificationResults(runtime, txId, {NKikimrScheme::StatusAccepted});
411+
const auto firstAlterTxId = txId;
412+
413+
//NOTE: SubDomain vs ExtSubDomain
414+
TestAlterSubDomain(runtime, ++txId, "/MyRoot",
415+
R"(
416+
Name: "USER_0"
417+
ExternalSchemeShard: true
418+
PlanResolution: 50
419+
Coordinators: 1
420+
Mediators: 1
421+
TimeCastBucketsPerMediator: 2
422+
StoragePools {
423+
Name: "pool-1"
424+
Kind: "hdd"
425+
}
426+
)",
427+
{{NKikimrScheme::StatusMultipleModifications}}
428+
);
429+
430+
env.TestWaitNotification(runtime, firstAlterTxId);
431+
}
432+
325433
Y_UNIT_TEST(CreateWithOnlyDotsNotAllowed) {
326434
TTestBasicRuntime runtime;
327435
TTestEnv env(runtime);

0 commit comments

Comments
 (0)