Skip to content

Commit f2066d1

Browse files
committed
handle follower dying while being promoted to leader (ydb-platform#15134)
1 parent 5315324 commit f2066d1

File tree

2 files changed

+77
-31
lines changed

2 files changed

+77
-31
lines changed

ydb/core/mind/hive/hive_ut.cpp

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3254,7 +3254,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
32543254
}
32553255
}
32563256

3257-
Y_UNIT_TEST(TestFollowerPromotion) {
3257+
void TestFollowerPromotion(bool killDuringPromotion) {
32583258
constexpr int NODES = 3;
32593259
TTestBasicRuntime runtime(NODES, false);
32603260
Setup(runtime, true);
@@ -3285,30 +3285,42 @@ Y_UNIT_TEST_SUITE(THiveTest) {
32853285
MakeSureTabletIsUp(runtime, tabletId, i, &pipeConfig, &tabletRolesBefore[i]);
32863286
}
32873287
int leaders = std::accumulate(tabletRolesBefore.begin(), tabletRolesBefore.end(), 0, [](int a, bool b) -> int { return b ? a + 1 : a; });
3288-
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
32893288
int leaderNode = std::find(tabletRolesBefore.begin(), tabletRolesBefore.end(), true) - tabletRolesBefore.begin();
3290-
// killing leader
3291-
SendKillLocal(runtime, leaderNode);
3289+
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
32923290
{
3293-
TDispatchOptions options;
3294-
options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus);
3295-
runtime.DispatchEvents(options);
3296-
}
3297-
std::array<bool, NODES> tabletRolesIntermediate = {};
3298-
for (int i = 0; i < NODES; ++i) {
3299-
if (i != leaderNode) {
3300-
MakeSureTabletIsUp(runtime, tabletId, i, &pipeConfig, &tabletRolesIntermediate[i]);
3301-
} else {
3302-
tabletRolesIntermediate[i] = false;
3291+
TBlockEvents<TEvTablet::TEvPromoteToLeader> blockPromote(runtime);
3292+
// killing leader
3293+
SendKillLocal(runtime, leaderNode);
3294+
3295+
while (blockPromote.empty()) {
3296+
runtime.DispatchEvents({}, TDuration::MilliSeconds(100));
3297+
}
3298+
3299+
if (killDuringPromotion) {
3300+
for (int i = 0; i < NODES; ++i) {
3301+
if (i == leaderNode) {
3302+
continue;
3303+
}
3304+
TActorId sender = runtime.AllocateEdgeActor(i);
3305+
runtime.SendToPipe(tabletId, sender, new TEvents::TEvPoisonPill, i, pipeConfig);
3306+
}
33033307
}
3308+
3309+
runtime.DispatchEvents({}, TDuration::MilliSeconds(100));
3310+
3311+
blockPromote.Stop().Unblock();
3312+
}
3313+
{
3314+
TDispatchOptions options;
3315+
options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus, killDuringPromotion ? 3 : 1);
3316+
runtime.DispatchEvents(options, TDuration::MilliSeconds(100));
33043317
}
3305-
leaders = std::accumulate(tabletRolesIntermediate.begin(), tabletRolesIntermediate.end(), 0, [](int a, bool b) -> int { return b ? a + 1 : a; });
3306-
int followers = std::accumulate(tabletRolesIntermediate.begin(), tabletRolesIntermediate.end(), 0, [](int a, bool b) -> int { return b ? a : a + 1; });
3307-
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
3308-
UNIT_ASSERT_VALUES_EQUAL(followers, 2);
33093318
std::unordered_set<std::pair<TTabletId, TFollowerId>> activeTablets;
33103319
TActorId senderA = runtime.AllocateEdgeActor();
33113320
for (int i = 0; i < NODES; ++i) {
3321+
if (i == leaderNode) {
3322+
continue;
3323+
}
33123324
TActorId whiteboard = NNodeWhiteboard::MakeNodeWhiteboardServiceId(runtime.GetNodeId(i));
33133325
runtime.Send(new IEventHandle(whiteboard, senderA, new NNodeWhiteboard::TEvWhiteboard::TEvTabletStateRequest()));
33143326
TAutoPtr<IEventHandle> handle;
@@ -3323,6 +3335,16 @@ Y_UNIT_TEST_SUITE(THiveTest) {
33233335
}
33243336
}
33253337
UNIT_ASSERT_VALUES_EQUAL(activeTablets.size(), 3);
3338+
leaders = std::count_if(activeTablets.begin(), activeTablets.end(), [](auto&& p) { return p.second == 0; });
3339+
UNIT_ASSERT_VALUES_EQUAL(leaders, 1);
3340+
}
3341+
3342+
Y_UNIT_TEST(TestFollowerPromotion) {
3343+
TestFollowerPromotion(false);
3344+
}
3345+
3346+
Y_UNIT_TEST(TestFollowerPromotionFollowerDies) {
3347+
TestFollowerPromotion(true);
33263348
}
33273349

33283350
Y_UNIT_TEST(TestManyFollowersOnOneNode) {

ydb/core/mind/local.cpp

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -53,19 +53,19 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
5353
ui32 Generation;
5454
TTabletTypes::EType TabletType;
5555
NKikimrLocal::EBootMode BootMode;
56-
ui32 FollowerId;
5756

5857
TTablet()
5958
: Tablet()
6059
, Generation(0)
6160
, TabletType()
6261
, BootMode(NKikimrLocal::EBootMode::BOOT_MODE_LEADER)
63-
, FollowerId(0)
6462
{}
6563
};
6664

6765
struct TTabletEntry : TTablet {
6866
TInstant From;
67+
bool IsPromoting = false;
68+
ui32 PromotingFromFollower = 0;
6969

7070
TTabletEntry()
7171
: From(TInstant::MicroSeconds(0))
@@ -142,6 +142,10 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
142142
::NMonitoring::TDynamicCounters::TCounterPtr CounterCancelDemotedByBS;
143143
::NMonitoring::TDynamicCounters::TCounterPtr CounterCancelUnknownReason;
144144

145+
static TTabletId LeaderId(TTabletId tabletId) {
146+
return {tabletId.first, 0};
147+
}
148+
145149
void Die(const TActorContext &ctx) override {
146150
if (HivePipeClient) {
147151
if (Connected) {
@@ -386,6 +390,24 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
386390
ScheduleSendTabletMetrics(ctx);
387391
}
388392

393+
void StartPromotion(TTabletId tabletId, TOnlineTabletEntry& followerEntry, ui32 suggestedGen, TInstant now) {
394+
TTabletId leaderId = LeaderId(tabletId);
395+
TTabletEntry& leaderEntry = InbootTablets[leaderId];
396+
followerEntry.IsPromoting = true;
397+
leaderEntry = followerEntry;
398+
leaderEntry.From = now;
399+
leaderEntry.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER;
400+
leaderEntry.Generation = suggestedGen;
401+
leaderEntry.PromotingFromFollower = tabletId.second;
402+
}
403+
404+
void FinishPromotion(TTabletId tabletId, TTabletEntry& entry) {
405+
TTabletId promotedTablet{tabletId.first, entry.PromotingFromFollower};
406+
OnlineTablets.erase(promotedTablet);
407+
entry.IsPromoting = false;
408+
entry.PromotingFromFollower = 0;
409+
}
410+
389411
void Handle(TEvLocal::TEvBootTablet::TPtr &ev, const TActorContext &ctx) {
390412
NKikimrLocal::TEvBootTablet &record = ev->Get()->Record;
391413
TIntrusivePtr<TTabletStorageInfo> info(TabletStorageInfoFromProto(record.GetInfo()));
@@ -428,18 +450,9 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
428450
if (it != OnlineTablets.end()) {
429451
if (it->second.BootMode == NKikimrLocal::EBootMode::BOOT_MODE_FOLLOWER
430452
&& record.GetBootMode() == NKikimrLocal::EBootMode::BOOT_MODE_LEADER) {
431-
// promote to leader
432-
it->second.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER;
433-
it->second.Generation = suggestedGen;
434-
tabletId.second = 0; // FollowerId = 0
435-
TTabletEntry &entry = InbootTablets[tabletId];
436-
entry = it->second;
437-
entry.From = ctx.Now();
438-
entry.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER;
439-
entry.Generation = suggestedGen;
440-
ctx.Send(entry.Tablet, new TEvTablet::TEvPromoteToLeader(suggestedGen, info));
453+
StartPromotion(tabletId, it->second, suggestedGen, ctx.Now());
454+
ctx.Send(it->second.Tablet, new TEvTablet::TEvPromoteToLeader(suggestedGen, info));
441455
MarkDeadTablet(it->first, 0, TEvLocal::TEvTabletStatus::StatusSupersededByLeader, TEvTablet::TEvTabletDead::ReasonError, ctx);
442-
OnlineTablets.erase(it);
443456
LOG_DEBUG_S(ctx, NKikimrServices::LOCAL,
444457
"TLocalNodeRegistrar::Handle TEvLocal::TEvBootTablet follower tablet " << tabletId << " promoted to leader");
445458
return;
@@ -719,6 +732,9 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
719732
<< " marked as running at generation "
720733
<< generation);
721734
NTabletPipe::SendData(ctx, HivePipeClient, new TEvLocal::TEvTabletStatus(TEvLocal::TEvTabletStatus::StatusOk, tabletId, generation));
735+
if (inbootIt->second.IsPromoting) {
736+
FinishPromotion(tabletId, inbootIt->second);
737+
}
722738
OnlineTablets.emplace(tabletId, inbootIt->second);
723739
InbootTablets.erase(inbootIt);
724740
}
@@ -819,6 +835,14 @@ class TLocalNodeRegistrar : public TActorBootstrapped<TLocalNodeRegistrar> {
819835
});
820836
if (onlineIt != OnlineTablets.end()) { // from online list
821837
MarkDeadTablet(onlineIt->first, generation, TEvLocal::TEvTabletStatus::StatusFailed, msg->Reason, ctx);
838+
if (onlineIt->second.IsPromoting) {
839+
TTabletId leader = LeaderId(onlineIt->first);
840+
auto inbootIt = InbootTablets.find(leader);
841+
if (inbootIt != InbootTablets.end()) {
842+
MarkDeadTablet(leader, inbootIt->second.Generation, TEvLocal::TEvTabletStatus::StatusFailed, msg->Reason, ctx);
843+
}
844+
InbootTablets.erase(inbootIt);
845+
}
822846
OnlineTablets.erase(onlineIt);
823847
UpdateEstimate();
824848
return;

0 commit comments

Comments
 (0)