Skip to content

Commit 893d5ae

Browse files
committed
handle follower dying while being promoted to leader
1 parent eba58c9 commit 893d5ae

File tree

2 files changed

+65
-23
lines changed

2 files changed

+65
-23
lines changed

ydb/core/mind/hive/hive_ut.cpp

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3363,7 +3363,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
33633363
}
33643364
}
33653365

3366-
Y_UNIT_TEST(TestFollowerPromotion) {
3366+
void TestFollowerPromotion(bool killDuringPromotion) {
33673367
constexpr int NODES = 3;
33683368
TTestBasicRuntime runtime(NODES, false);
33693369
Setup(runtime, true);
@@ -3394,30 +3394,42 @@ Y_UNIT_TEST_SUITE(THiveTest) {
33943394
MakeSureTabletIsUp(runtime, tabletId, i, &pipeConfig, &tabletRolesBefore[i]);
33953395
}
33963396
int leaders = std::accumulate(tabletRolesBefore.begin(), tabletRolesBefore.end(), 0, [](int a, bool b) -> int { return b ? a + 1 : a; });
3397-
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
33983397
int leaderNode = std::find(tabletRolesBefore.begin(), tabletRolesBefore.end(), true) - tabletRolesBefore.begin();
3399-
// killing leader
3400-
SendKillLocal(runtime, leaderNode);
3398+
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
34013399
{
3402-
TDispatchOptions options;
3403-
options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus);
3404-
runtime.DispatchEvents(options);
3405-
}
3406-
std::array<bool, NODES> tabletRolesIntermediate = {};
3407-
for (int i = 0; i < NODES; ++i) {
3408-
if (i != leaderNode) {
3409-
MakeSureTabletIsUp(runtime, tabletId, i, &pipeConfig, &tabletRolesIntermediate[i]);
3410-
} else {
3411-
tabletRolesIntermediate[i] = false;
3400+
TBlockEvents<TEvTablet::TEvPromoteToLeader> blockPromote(runtime);
3401+
// killing leader
3402+
SendKillLocal(runtime, leaderNode);
3403+
3404+
while (blockPromote.empty()) {
3405+
runtime.DispatchEvents({}, TDuration::MilliSeconds(100));
3406+
}
3407+
3408+
if (killDuringPromotion) {
3409+
for (int i = 0; i < NODES; ++i) {
3410+
if (i == leaderNode) {
3411+
continue;
3412+
}
3413+
TActorId sender = runtime.AllocateEdgeActor(i);
3414+
runtime.SendToPipe(tabletId, sender, new TEvents::TEvPoisonPill, i, pipeConfig);
3415+
}
34123416
}
3417+
3418+
runtime.DispatchEvents({}, TDuration::MilliSeconds(100));
3419+
3420+
blockPromote.Stop().Unblock();
3421+
}
3422+
{
3423+
TDispatchOptions options;
3424+
options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus, killDuringPromotion ? 3 : 1);
3425+
runtime.DispatchEvents(options, TDuration::MilliSeconds(100));
34133426
}
3414-
leaders = std::accumulate(tabletRolesIntermediate.begin(), tabletRolesIntermediate.end(), 0, [](int a, bool b) -> int { return b ? a + 1 : a; });
3415-
int followers = std::accumulate(tabletRolesIntermediate.begin(), tabletRolesIntermediate.end(), 0, [](int a, bool b) -> int { return b ? a : a + 1; });
3416-
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
3417-
UNIT_ASSERT_VALUES_EQUAL(followers, 2);
34183427
std::unordered_set<std::pair<TTabletId, TFollowerId>> activeTablets;
34193428
TActorId senderA = runtime.AllocateEdgeActor();
34203429
for (int i = 0; i < NODES; ++i) {
3430+
if (i == leaderNode) {
3431+
continue;
3432+
}
34213433
TActorId whiteboard = NNodeWhiteboard::MakeNodeWhiteboardServiceId(runtime.GetNodeId(i));
34223434
runtime.Send(new IEventHandle(whiteboard, senderA, new NNodeWhiteboard::TEvWhiteboard::TEvTabletStateRequest()));
34233435
TAutoPtr<IEventHandle> handle;
@@ -3432,6 +3444,16 @@ Y_UNIT_TEST_SUITE(THiveTest) {
34323444
}
34333445
}
34343446
UNIT_ASSERT_VALUES_EQUAL(activeTablets.size(), 3);
3447+
leaders = std::count_if(activeTablets.begin(), activeTablets.end(), [](auto&& p) { return p.second == 0; });
3448+
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
3449+
}
3450+
3451+
Y_UNIT_TEST(TestFollowerPromotion) {
3452+
TestFollowerPromotion(false);
3453+
}
3454+
3455+
Y_UNIT_TEST(TestFollowerPromotionFollowerDies) {
3456+
TestFollowerPromotion(true);
34353457
}
34363458

34373459
Y_UNIT_TEST(TestManyFollowersOnOneNode) {

ydb/core/mind/local.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,19 +52,19 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
5252
ui32 Generation;
5353
TTabletTypes::EType TabletType;
5454
NKikimrLocal::EBootMode BootMode;
55-
ui32 FollowerId;
5655

5756
TTablet()
5857
: Tablet()
5958
, Generation(0)
6059
, TabletType()
6160
, BootMode(NKikimrLocal::EBootMode::BOOT_MODE_LEADER)
62-
, FollowerId(0)
6361
{}
6462
};
6563

6664
struct TTabletEntry : TTablet {
6765
TInstant From;
66+
bool IsPromoting = false;
67+
ui32 PromotingFromFollower = 0;
6868

6969
TTabletEntry()
7070
: From(TInstant::MicroSeconds(0))
@@ -141,6 +141,10 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
141141
::NMonitoring::TDynamicCounters::TCounterPtr CounterCancelDemotedByBS;
142142
::NMonitoring::TDynamicCounters::TCounterPtr CounterCancelUnknownReason;
143143

144+
static TTabletId LeaderId(TTabletId tabletId) {
145+
return {tabletId.first, 0};
146+
}
147+
144148
void Die(const TActorContext &ctx) override {
145149
if (HivePipeClient) {
146150
if (Connected) {
@@ -430,15 +434,17 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
430434
// promote to leader
431435
it->second.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER;
432436
it->second.Generation = suggestedGen;
433-
tabletId.second = 0; // FollowerId = 0
434-
TTabletEntry &entry = InbootTablets[tabletId];
437+
TTabletId leaderId = LeaderId(tabletId);
438+
TTabletEntry &entry = InbootTablets[leaderId];
435439
entry = it->second;
436440
entry.From = ctx.Now();
437441
entry.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER;
438442
entry.Generation = suggestedGen;
443+
entry.IsPromoting = true;
444+
entry.PromotingFromFollower = tabletId.second;
439445
ctx.Send(entry.Tablet, new TEvTablet::TEvPromoteToLeader(suggestedGen, info));
440446
MarkDeadTablet(it->first, 0, TEvLocal::TEvTabletStatus::StatusSupersededByLeader, TEvTablet::TEvTabletDead::ReasonError, ctx);
441-
OnlineTablets.erase(it);
447+
it->second.IsPromoting = true;
442448
LOG_DEBUG_S(ctx, NKikimrServices::LOCAL,
443449
"TLocalNodeRegistrar::Handle TEvLocal::TEvBootTablet follower tablet " << tabletId << " promoted to leader");
444450
return;
@@ -718,6 +724,12 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
718724
<< " marked as running at generation "
719725
<< generation);
720726
NTabletPipe::SendData(ctx, HivePipeClient, new TEvLocal::TEvTabletStatus(TEvLocal::TEvTabletStatus::StatusOk, tabletId, generation));
727+
if (inbootIt->second.IsPromoting) {
728+
TTabletId promotedTablet{tabletId.first, inbootIt->second.PromotingFromFollower};
729+
OnlineTablets.erase(promotedTablet);
730+
inbootIt->second.IsPromoting = false;
731+
inbootIt->second.PromotingFromFollower = 0;
732+
}
721733
OnlineTablets.emplace(tabletId, inbootIt->second);
722734
InbootTablets.erase(inbootIt);
723735
}
@@ -818,6 +830,14 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
818830
});
819831
if (onlineIt != OnlineTablets.end()) { // from online list
820832
MarkDeadTablet(onlineIt->first, generation, TEvLocal::TEvTabletStatus::StatusFailed, msg->Reason, ctx);
833+
if (onlineIt->second.IsPromoting) {
834+
TTabletId leader = LeaderId(onlineIt->first);
835+
auto inbootIt = InbootTablets.find(leader);
836+
if (inbootIt != InbootTablets.end()) {
837+
MarkDeadTablet(leader, inbootIt->second.Generation, TEvLocal::TEvTabletStatus::StatusFailed, msg->Reason, ctx);
838+
}
839+
InbootTablets.erase(inbootIt);
840+
}
821841
OnlineTablets.erase(onlineIt);
822842
UpdateEstimate();
823843
return;

0 commit comments

Comments
 (0)