Skip to content

Commit 047d5b9

Browse files
authored
Merge 893d5ae into 14389e2
2 parents 14389e2 + 893d5ae commit 047d5b9

File tree

2 files changed

+65
-23
lines changed

2 files changed

+65
-23
lines changed

ydb/core/mind/hive/hive_ut.cpp

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3360,7 +3360,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
33603360
}
33613361
}
33623362

3363-
Y_UNIT_TEST(TestFollowerPromotion) {
3363+
void TestFollowerPromotion(bool killDuringPromotion) {
33643364
constexpr int NODES = 3;
33653365
TTestBasicRuntime runtime(NODES, false);
33663366
Setup(runtime, true);
@@ -3391,30 +3391,42 @@ Y_UNIT_TEST_SUITE(THiveTest) {
33913391
MakeSureTabletIsUp(runtime, tabletId, i, &pipeConfig, &tabletRolesBefore[i]);
33923392
}
33933393
int leaders = std::accumulate(tabletRolesBefore.begin(), tabletRolesBefore.end(), 0, [](int a, bool b) -> int { return b ? a + 1 : a; });
3394-
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
33953394
int leaderNode = std::find(tabletRolesBefore.begin(), tabletRolesBefore.end(), true) - tabletRolesBefore.begin();
3396-
// killing leader
3397-
SendKillLocal(runtime, leaderNode);
3395+
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
33983396
{
3399-
TDispatchOptions options;
3400-
options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus);
3401-
runtime.DispatchEvents(options);
3402-
}
3403-
std::array<bool, NODES> tabletRolesIntermediate = {};
3404-
for (int i = 0; i < NODES; ++i) {
3405-
if (i != leaderNode) {
3406-
MakeSureTabletIsUp(runtime, tabletId, i, &pipeConfig, &tabletRolesIntermediate[i]);
3407-
} else {
3408-
tabletRolesIntermediate[i] = false;
3397+
TBlockEvents<TEvTablet::TEvPromoteToLeader> blockPromote(runtime);
3398+
// killing leader
3399+
SendKillLocal(runtime, leaderNode);
3400+
3401+
while (blockPromote.empty()) {
3402+
runtime.DispatchEvents({}, TDuration::MilliSeconds(100));
3403+
}
3404+
3405+
if (killDuringPromotion) {
3406+
for (int i = 0; i < NODES; ++i) {
3407+
if (i == leaderNode) {
3408+
continue;
3409+
}
3410+
TActorId sender = runtime.AllocateEdgeActor(i);
3411+
runtime.SendToPipe(tabletId, sender, new TEvents::TEvPoisonPill, i, pipeConfig);
3412+
}
34093413
}
3414+
3415+
runtime.DispatchEvents({}, TDuration::MilliSeconds(100));
3416+
3417+
blockPromote.Stop().Unblock();
3418+
}
3419+
{
3420+
TDispatchOptions options;
3421+
options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus, killDuringPromotion ? 3 : 1);
3422+
runtime.DispatchEvents(options, TDuration::MilliSeconds(100));
34103423
}
3411-
leaders = std::accumulate(tabletRolesIntermediate.begin(), tabletRolesIntermediate.end(), 0, [](int a, bool b) -> int { return b ? a + 1 : a; });
3412-
int followers = std::accumulate(tabletRolesIntermediate.begin(), tabletRolesIntermediate.end(), 0, [](int a, bool b) -> int { return b ? a : a + 1; });
3413-
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
3414-
UNIT_ASSERT_VALUES_EQUAL(followers, 2);
34153424
std::unordered_set<std::pair<TTabletId, TFollowerId>> activeTablets;
34163425
TActorId senderA = runtime.AllocateEdgeActor();
34173426
for (int i = 0; i < NODES; ++i) {
3427+
if (i == leaderNode) {
3428+
continue;
3429+
}
34183430
TActorId whiteboard = NNodeWhiteboard::MakeNodeWhiteboardServiceId(runtime.GetNodeId(i));
34193431
runtime.Send(new IEventHandle(whiteboard, senderA, new NNodeWhiteboard::TEvWhiteboard::TEvTabletStateRequest()));
34203432
TAutoPtr<IEventHandle> handle;
@@ -3429,6 +3441,16 @@ Y_UNIT_TEST_SUITE(THiveTest) {
34293441
}
34303442
}
34313443
UNIT_ASSERT_VALUES_EQUAL(activeTablets.size(), 3);
3444+
leaders = std::count_if(activeTablets.begin(), activeTablets.end(), [](auto&& p) { return p.second == 0; });
3445+
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
3446+
}
3447+
3448+
Y_UNIT_TEST(TestFollowerPromotion) {
3449+
TestFollowerPromotion(false);
3450+
}
3451+
3452+
Y_UNIT_TEST(TestFollowerPromotionFollowerDies) {
3453+
TestFollowerPromotion(true);
34323454
}
34333455

34343456
Y_UNIT_TEST(TestManyFollowersOnOneNode) {

ydb/core/mind/local.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,19 +52,19 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
5252
ui32 Generation;
5353
TTabletTypes::EType TabletType;
5454
NKikimrLocal::EBootMode BootMode;
55-
ui32 FollowerId;
5655

5756
TTablet()
5857
: Tablet()
5958
, Generation(0)
6059
, TabletType()
6160
, BootMode(NKikimrLocal::EBootMode::BOOT_MODE_LEADER)
62-
, FollowerId(0)
6361
{}
6462
};
6563

6664
struct TTabletEntry : TTablet {
6765
TInstant From;
66+
bool IsPromoting = false;
67+
ui32 PromotingFromFollower = 0;
6868

6969
TTabletEntry()
7070
: From(TInstant::MicroSeconds(0))
@@ -141,6 +141,10 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
141141
::NMonitoring::TDynamicCounters::TCounterPtr CounterCancelDemotedByBS;
142142
::NMonitoring::TDynamicCounters::TCounterPtr CounterCancelUnknownReason;
143143

144+
static TTabletId LeaderId(TTabletId tabletId) {
145+
return {tabletId.first, 0};
146+
}
147+
144148
void Die(const TActorContext &ctx) override {
145149
if (HivePipeClient) {
146150
if (Connected) {
@@ -430,15 +434,17 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
430434
// promote to leader
431435
it->second.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER;
432436
it->second.Generation = suggestedGen;
433-
tabletId.second = 0; // FollowerId = 0
434-
TTabletEntry &entry = InbootTablets[tabletId];
437+
TTabletId leaderId = LeaderId(tabletId);
438+
TTabletEntry &entry = InbootTablets[leaderId];
435439
entry = it->second;
436440
entry.From = ctx.Now();
437441
entry.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER;
438442
entry.Generation = suggestedGen;
443+
entry.IsPromoting = true;
444+
entry.PromotingFromFollower = tabletId.second;
439445
ctx.Send(entry.Tablet, new TEvTablet::TEvPromoteToLeader(suggestedGen, info));
440446
MarkDeadTablet(it->first, 0, TEvLocal::TEvTabletStatus::StatusSupersededByLeader, TEvTablet::TEvTabletDead::ReasonError, ctx);
441-
OnlineTablets.erase(it);
447+
it->second.IsPromoting = true;
442448
LOG_DEBUG_S(ctx, NKikimrServices::LOCAL,
443449
"TLocalNodeRegistrar::Handle TEvLocal::TEvBootTablet follower tablet " << tabletId << " promoted to leader");
444450
return;
@@ -718,6 +724,12 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
718724
<< " marked as running at generation "
719725
<< generation);
720726
NTabletPipe::SendData(ctx, HivePipeClient, new TEvLocal::TEvTabletStatus(TEvLocal::TEvTabletStatus::StatusOk, tabletId, generation));
727+
if (inbootIt->second.IsPromoting) {
728+
TTabletId promotedTablet{tabletId.first, inbootIt->second.PromotingFromFollower};
729+
OnlineTablets.erase(promotedTablet);
730+
inbootIt->second.IsPromoting = false;
731+
inbootIt->second.PromotingFromFollower = 0;
732+
}
721733
OnlineTablets.emplace(tabletId, inbootIt->second);
722734
InbootTablets.erase(inbootIt);
723735
}
@@ -818,6 +830,14 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
818830
});
819831
if (onlineIt != OnlineTablets.end()) { // from online list
820832
MarkDeadTablet(onlineIt->first, generation, TEvLocal::TEvTabletStatus::StatusFailed, msg->Reason, ctx);
833+
if (onlineIt->second.IsPromoting) {
834+
TTabletId leader = LeaderId(onlineIt->first);
835+
auto inbootIt = InbootTablets.find(leader);
836+
if (inbootIt != InbootTablets.end()) {
837+
MarkDeadTablet(leader, inbootIt->second.Generation, TEvLocal::TEvTabletStatus::StatusFailed, msg->Reason, ctx);
838+
}
839+
InbootTablets.erase(inbootIt);
840+
}
821841
OnlineTablets.erase(onlineIt);
822842
UpdateEstimate();
823843
return;

0 commit comments

Comments
 (0)