Skip to content

Commit 1f60fc8

Browse files
authored
handle follower dying while being promoted to leader (#15134)
1 parent 6afe3c8 commit 1f60fc8

File tree

2 files changed

+77
-31
lines changed

2 files changed

+77
-31
lines changed

ydb/core/mind/hive/hive_ut.cpp

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3360,7 +3360,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
33603360
}
33613361
}
33623362

3363-
Y_UNIT_TEST(TestFollowerPromotion) {
3363+
void TestFollowerPromotion(bool killDuringPromotion) {
33643364
constexpr int NODES = 3;
33653365
TTestBasicRuntime runtime(NODES, false);
33663366
Setup(runtime, true);
@@ -3391,30 +3391,42 @@ Y_UNIT_TEST_SUITE(THiveTest) {
33913391
MakeSureTabletIsUp(runtime, tabletId, i, &pipeConfig, &tabletRolesBefore[i]);
33923392
}
33933393
int leaders = std::accumulate(tabletRolesBefore.begin(), tabletRolesBefore.end(), 0, [](int a, bool b) -> int { return b ? a + 1 : a; });
3394-
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
33953394
int leaderNode = std::find(tabletRolesBefore.begin(), tabletRolesBefore.end(), true) - tabletRolesBefore.begin();
3396-
// killing leader
3397-
SendKillLocal(runtime, leaderNode);
3395+
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
33983396
{
3399-
TDispatchOptions options;
3400-
options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus);
3401-
runtime.DispatchEvents(options);
3402-
}
3403-
std::array<bool, NODES> tabletRolesIntermediate = {};
3404-
for (int i = 0; i < NODES; ++i) {
3405-
if (i != leaderNode) {
3406-
MakeSureTabletIsUp(runtime, tabletId, i, &pipeConfig, &tabletRolesIntermediate[i]);
3407-
} else {
3408-
tabletRolesIntermediate[i] = false;
3397+
TBlockEvents<TEvTablet::TEvPromoteToLeader> blockPromote(runtime);
3398+
// killing leader
3399+
SendKillLocal(runtime, leaderNode);
3400+
3401+
while (blockPromote.empty()) {
3402+
runtime.DispatchEvents({}, TDuration::MilliSeconds(100));
3403+
}
3404+
3405+
if (killDuringPromotion) {
3406+
for (int i = 0; i < NODES; ++i) {
3407+
if (i == leaderNode) {
3408+
continue;
3409+
}
3410+
TActorId sender = runtime.AllocateEdgeActor(i);
3411+
runtime.SendToPipe(tabletId, sender, new TEvents::TEvPoisonPill, i, pipeConfig);
3412+
}
34093413
}
3414+
3415+
runtime.DispatchEvents({}, TDuration::MilliSeconds(100));
3416+
3417+
blockPromote.Stop().Unblock();
3418+
}
3419+
{
3420+
TDispatchOptions options;
3421+
options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus, killDuringPromotion ? 3 : 1);
3422+
runtime.DispatchEvents(options, TDuration::MilliSeconds(100));
34103423
}
3411-
leaders = std::accumulate(tabletRolesIntermediate.begin(), tabletRolesIntermediate.end(), 0, [](int a, bool b) -> int { return b ? a + 1 : a; });
3412-
int followers = std::accumulate(tabletRolesIntermediate.begin(), tabletRolesIntermediate.end(), 0, [](int a, bool b) -> int { return b ? a : a + 1; });
3413-
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
3414-
UNIT_ASSERT_VALUES_EQUAL(followers, 2);
34153424
std::unordered_set<std::pair<TTabletId, TFollowerId>> activeTablets;
34163425
TActorId senderA = runtime.AllocateEdgeActor();
34173426
for (int i = 0; i < NODES; ++i) {
3427+
if (i == leaderNode) {
3428+
continue;
3429+
}
34183430
TActorId whiteboard = NNodeWhiteboard::MakeNodeWhiteboardServiceId(runtime.GetNodeId(i));
34193431
runtime.Send(new IEventHandle(whiteboard, senderA, new NNodeWhiteboard::TEvWhiteboard::TEvTabletStateRequest()));
34203432
TAutoPtr<IEventHandle> handle;
@@ -3429,6 +3441,16 @@ Y_UNIT_TEST_SUITE(THiveTest) {
34293441
}
34303442
}
34313443
UNIT_ASSERT_VALUES_EQUAL(activeTablets.size(), 3);
3444+
leaders = std::count_if(activeTablets.begin(), activeTablets.end(), [](auto&& p) { return p.second == 0; });
3445+
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
3446+
}
3447+
3448+
Y_UNIT_TEST(TestFollowerPromotion) {
3449+
TestFollowerPromotion(false);
3450+
}
3451+
3452+
Y_UNIT_TEST(TestFollowerPromotionFollowerDies) {
3453+
TestFollowerPromotion(true);
34323454
}
34333455

34343456
Y_UNIT_TEST(TestManyFollowersOnOneNode) {

ydb/core/mind/local.cpp

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -52,19 +52,19 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
5252
ui32 Generation;
5353
TTabletTypes::EType TabletType;
5454
NKikimrLocal::EBootMode BootMode;
55-
ui32 FollowerId;
5655

5756
TTablet()
5857
: Tablet()
5958
, Generation(0)
6059
, TabletType()
6160
, BootMode(NKikimrLocal::EBootMode::BOOT_MODE_LEADER)
62-
, FollowerId(0)
6361
{}
6462
};
6563

6664
struct TTabletEntry : TTablet {
6765
TInstant From;
66+
bool IsPromoting = false;
67+
ui32 PromotingFromFollower = 0;
6868

6969
TTabletEntry()
7070
: From(TInstant::MicroSeconds(0))
@@ -141,6 +141,10 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
141141
::NMonitoring::TDynamicCounters::TCounterPtr CounterCancelDemotedByBS;
142142
::NMonitoring::TDynamicCounters::TCounterPtr CounterCancelUnknownReason;
143143

144+
static TTabletId LeaderId(TTabletId tabletId) {
145+
return {tabletId.first, 0};
146+
}
147+
144148
void Die(const TActorContext &ctx) override {
145149
if (HivePipeClient) {
146150
if (Connected) {
@@ -385,6 +389,24 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
385389
ScheduleSendTabletMetrics(ctx);
386390
}
387391

392+
void StartPromotion(TTabletId tabletId, TOnlineTabletEntry& followerEntry, ui32 suggestedGen, TInstant now) {
393+
TTabletId leaderId = LeaderId(tabletId);
394+
TTabletEntry& leaderEntry = InbootTablets[leaderId];
395+
followerEntry.IsPromoting = true;
396+
leaderEntry = followerEntry;
397+
leaderEntry.From = now;
398+
leaderEntry.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER;
399+
leaderEntry.Generation = suggestedGen;
400+
leaderEntry.PromotingFromFollower = tabletId.second;
401+
}
402+
403+
void FinishPromotion(TTabletId tabletId, TTabletEntry& entry) {
404+
TTabletId promotedTablet{tabletId.first, entry.PromotingFromFollower};
405+
OnlineTablets.erase(promotedTablet);
406+
entry.IsPromoting = false;
407+
entry.PromotingFromFollower = 0;
408+
}
409+
388410
void Handle(TEvLocal::TEvBootTablet::TPtr &ev, const TActorContext &ctx) {
389411
NKikimrLocal::TEvBootTablet &record = ev->Get()->Record;
390412
TIntrusivePtr<TTabletStorageInfo> info(TabletStorageInfoFromProto(record.GetInfo()));
@@ -427,18 +449,9 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
427449
if (it != OnlineTablets.end()) {
428450
if (it->second.BootMode == NKikimrLocal::EBootMode::BOOT_MODE_FOLLOWER
429451
&& record.GetBootMode() == NKikimrLocal::EBootMode::BOOT_MODE_LEADER) {
430-
// promote to leader
431-
it->second.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER;
432-
it->second.Generation = suggestedGen;
433-
tabletId.second = 0; // FollowerId = 0
434-
TTabletEntry &entry = InbootTablets[tabletId];
435-
entry = it->second;
436-
entry.From = ctx.Now();
437-
entry.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER;
438-
entry.Generation = suggestedGen;
439-
ctx.Send(entry.Tablet, new TEvTablet::TEvPromoteToLeader(suggestedGen, info));
452+
StartPromotion(tabletId, it->second, suggestedGen, ctx.Now());
453+
ctx.Send(it->second.Tablet, new TEvTablet::TEvPromoteToLeader(suggestedGen, info));
440454
MarkDeadTablet(it->first, 0, TEvLocal::TEvTabletStatus::StatusSupersededByLeader, TEvTablet::TEvTabletDead::ReasonError, ctx);
441-
OnlineTablets.erase(it);
442455
LOG_DEBUG_S(ctx, NKikimrServices::LOCAL,
443456
"TLocalNodeRegistrar::Handle TEvLocal::TEvBootTablet follower tablet " << tabletId << " promoted to leader");
444457
return;
@@ -718,6 +731,9 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
718731
<< " marked as running at generation "
719732
<< generation);
720733
NTabletPipe::SendData(ctx, HivePipeClient, new TEvLocal::TEvTabletStatus(TEvLocal::TEvTabletStatus::StatusOk, tabletId, generation));
734+
if (inbootIt->second.IsPromoting) {
735+
FinishPromotion(tabletId, inbootIt->second);
736+
}
721737
OnlineTablets.emplace(tabletId, inbootIt->second);
722738
InbootTablets.erase(inbootIt);
723739
}
@@ -818,6 +834,14 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
818834
});
819835
if (onlineIt != OnlineTablets.end()) { // from online list
820836
MarkDeadTablet(onlineIt->first, generation, TEvLocal::TEvTabletStatus::StatusFailed, msg->Reason, ctx);
837+
if (onlineIt->second.IsPromoting) {
838+
TTabletId leader = LeaderId(onlineIt->first);
839+
auto inbootIt = InbootTablets.find(leader);
840+
if (inbootIt != InbootTablets.end()) {
841+
MarkDeadTablet(leader, inbootIt->second.Generation, TEvLocal::TEvTabletStatus::StatusFailed, msg->Reason, ctx);
842+
}
843+
InbootTablets.erase(inbootIt);
844+
}
821845
OnlineTablets.erase(onlineIt);
822846
UpdateEstimate();
823847
return;

0 commit comments

Comments
 (0)