diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp index ae6823df3bcf..ea8b27b32d39 100644 --- a/ydb/core/mind/hive/hive_ut.cpp +++ b/ydb/core/mind/hive/hive_ut.cpp @@ -3363,7 +3363,7 @@ Y_UNIT_TEST_SUITE(THiveTest) { } } - Y_UNIT_TEST(TestFollowerPromotion) { + void TestFollowerPromotion(bool killDuringPromotion) { constexpr int NODES = 3; TTestBasicRuntime runtime(NODES, false); Setup(runtime, true); @@ -3394,30 +3394,42 @@ Y_UNIT_TEST_SUITE(THiveTest) { MakeSureTabletIsUp(runtime, tabletId, i, &pipeConfig, &tabletRolesBefore[i]); } int leaders = std::accumulate(tabletRolesBefore.begin(), tabletRolesBefore.end(), 0, [](int a, bool b) -> int { return b ? a + 1 : a; }); - UNIT_ASSERT_VALUES_EQUAL(leaders, 1); int leaderNode = std::find(tabletRolesBefore.begin(), tabletRolesBefore.end(), true) - tabletRolesBefore.begin(); - // killing leader - SendKillLocal(runtime, leaderNode); + UNIT_ASSERT_VALUES_EQUAL(leaders, 1); { - TDispatchOptions options; - options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus); - runtime.DispatchEvents(options); - } - std::array tabletRolesIntermediate = {}; - for (int i = 0; i < NODES; ++i) { - if (i != leaderNode) { - MakeSureTabletIsUp(runtime, tabletId, i, &pipeConfig, &tabletRolesIntermediate[i]); - } else { - tabletRolesIntermediate[i] = false; + TBlockEvents blockPromote(runtime); + // killing leader + SendKillLocal(runtime, leaderNode); + + while (blockPromote.empty()) { + runtime.DispatchEvents({}, TDuration::MilliSeconds(100)); + } + + if (killDuringPromotion) { + for (int i = 0; i < NODES; ++i) { + if (i == leaderNode) { + continue; + } + TActorId sender = runtime.AllocateEdgeActor(i); + runtime.SendToPipe(tabletId, sender, new TEvents::TEvPoisonPill, i, pipeConfig); + } } + + runtime.DispatchEvents({}, TDuration::MilliSeconds(100)); + + blockPromote.Stop().Unblock(); + } + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvLocal::EvTabletStatus, killDuringPromotion ? 3 : 1); + runtime.DispatchEvents(options, TDuration::MilliSeconds(100)); } - leaders = std::accumulate(tabletRolesIntermediate.begin(), tabletRolesIntermediate.end(), 0, [](int a, bool b) -> int { return b ? a + 1 : a; }); - int followers = std::accumulate(tabletRolesIntermediate.begin(), tabletRolesIntermediate.end(), 0, [](int a, bool b) -> int { return b ? a : a + 1; }); - UNIT_ASSERT_VALUES_EQUAL(leaders, 1); - UNIT_ASSERT_VALUES_EQUAL(followers, 2); std::unordered_set> activeTablets; TActorId senderA = runtime.AllocateEdgeActor(); for (int i = 0; i < NODES; ++i) { + if (i == leaderNode) { + continue; + } TActorId whiteboard = NNodeWhiteboard::MakeNodeWhiteboardServiceId(runtime.GetNodeId(i)); runtime.Send(new IEventHandle(whiteboard, senderA, new NNodeWhiteboard::TEvWhiteboard::TEvTabletStateRequest())); TAutoPtr handle; @@ -3432,6 +3444,16 @@ Y_UNIT_TEST_SUITE(THiveTest) { } } UNIT_ASSERT_VALUES_EQUAL(activeTablets.size(), 3); + leaders = std::count_if(activeTablets.begin(), activeTablets.end(), [](auto&& p) { return p.second == 0; }); + UNIT_ASSERT_VALUES_EQUAL(leaders, 1); + } + + Y_UNIT_TEST(TestFollowerPromotion) { + TestFollowerPromotion(false); + } + + Y_UNIT_TEST(TestFollowerPromotionFollowerDies) { + TestFollowerPromotion(true); } Y_UNIT_TEST(TestManyFollowersOnOneNode) { diff --git a/ydb/core/mind/local.cpp b/ydb/core/mind/local.cpp index 3bffef054101..a8242818bf8c 100644 --- a/ydb/core/mind/local.cpp +++ b/ydb/core/mind/local.cpp @@ -52,19 +52,19 @@ class TLocalNodeRegistrar : public TActorBootstrapped { ui32 Generation; TTabletTypes::EType TabletType; NKikimrLocal::EBootMode BootMode; - ui32 FollowerId; TTablet() : Tablet() , Generation(0) , TabletType() , BootMode(NKikimrLocal::EBootMode::BOOT_MODE_LEADER) - , FollowerId(0) {} }; struct TTabletEntry : TTablet { TInstant From; + bool IsPromoting = false; + ui32 PromotingFromFollower = 0; TTabletEntry() : From(TInstant::MicroSeconds(0)) @@ -141,6 +141,10 @@ class TLocalNodeRegistrar : public TActorBootstrapped { ::NMonitoring::TDynamicCounters::TCounterPtr CounterCancelDemotedByBS; ::NMonitoring::TDynamicCounters::TCounterPtr CounterCancelUnknownReason; + static TTabletId LeaderId(TTabletId tabletId) { + return {tabletId.first, 0}; + } + void Die(const TActorContext &ctx) override { if (HivePipeClient) { if (Connected) { @@ -385,6 +389,24 @@ class TLocalNodeRegistrar : public TActorBootstrapped { ScheduleSendTabletMetrics(ctx); } + void StartPromotion(TTabletId tabletId, TOnlineTabletEntry& followerEntry, ui32 suggestedGen, TInstant now) { + TTabletId leaderId = LeaderId(tabletId); + TTabletEntry& leaderEntry = InbootTablets[leaderId]; + followerEntry.IsPromoting = true; + leaderEntry = followerEntry; + leaderEntry.From = now; + leaderEntry.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER; + leaderEntry.Generation = suggestedGen; + leaderEntry.PromotingFromFollower = tabletId.second; + } + + void FinishPromotion(TTabletId tabletId, TTabletEntry& entry) { + TTabletId promotedTablet{tabletId.first, entry.PromotingFromFollower}; + OnlineTablets.erase(promotedTablet); + entry.IsPromoting = false; + entry.PromotingFromFollower = 0; + } + void Handle(TEvLocal::TEvBootTablet::TPtr &ev, const TActorContext &ctx) { NKikimrLocal::TEvBootTablet &record = ev->Get()->Record; TIntrusivePtr info(TabletStorageInfoFromProto(record.GetInfo())); @@ -427,18 +449,9 @@ class TLocalNodeRegistrar : public TActorBootstrapped { if (it != OnlineTablets.end()) { if (it->second.BootMode == NKikimrLocal::EBootMode::BOOT_MODE_FOLLOWER && record.GetBootMode() == NKikimrLocal::EBootMode::BOOT_MODE_LEADER) { - // promote to leader - it->second.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER; - it->second.Generation = suggestedGen; - tabletId.second = 0; // FollowerId = 0 - TTabletEntry &entry = InbootTablets[tabletId]; - entry = it->second; - entry.From = ctx.Now(); - entry.BootMode = NKikimrLocal::EBootMode::BOOT_MODE_LEADER; - entry.Generation = suggestedGen; - ctx.Send(entry.Tablet, new TEvTablet::TEvPromoteToLeader(suggestedGen, info)); + StartPromotion(tabletId, it->second, suggestedGen, ctx.Now()); + ctx.Send(it->second.Tablet, new TEvTablet::TEvPromoteToLeader(suggestedGen, info)); MarkDeadTablet(it->first, 0, TEvLocal::TEvTabletStatus::StatusSupersededByLeader, TEvTablet::TEvTabletDead::ReasonError, ctx); - OnlineTablets.erase(it); LOG_DEBUG_S(ctx, NKikimrServices::LOCAL, "TLocalNodeRegistrar::Handle TEvLocal::TEvBootTablet follower tablet " << tabletId << " promoted to leader"); return; @@ -718,6 +731,9 @@ class TLocalNodeRegistrar : public TActorBootstrapped { << " marked as running at generation " << generation); NTabletPipe::SendData(ctx, HivePipeClient, new TEvLocal::TEvTabletStatus(TEvLocal::TEvTabletStatus::StatusOk, tabletId, generation)); + if (inbootIt->second.IsPromoting) { + FinishPromotion(tabletId, inbootIt->second); + } OnlineTablets.emplace(tabletId, inbootIt->second); InbootTablets.erase(inbootIt); } @@ -818,6 +834,14 @@ class TLocalNodeRegistrar : public TActorBootstrapped { }); if (onlineIt != OnlineTablets.end()) { // from online list MarkDeadTablet(onlineIt->first, generation, TEvLocal::TEvTabletStatus::StatusFailed, msg->Reason, ctx); + if (onlineIt->second.IsPromoting) { + TTabletId leader = LeaderId(onlineIt->first); + auto inbootIt = InbootTablets.find(leader); + if (inbootIt != InbootTablets.end()) { + MarkDeadTablet(leader, inbootIt->second.Generation, TEvLocal::TEvTabletStatus::StatusFailed, msg->Reason, ctx); + } + InbootTablets.erase(inbootIt); + } OnlineTablets.erase(onlineIt); UpdateEstimate(); return;