Skip to content

Commit c65f27b

Browse files
authored
improve node deletion in hive (#7218)
1 parent d551911 commit c65f27b

File tree

4 files changed

+17
-6
lines changed

4 files changed

+17
-6
lines changed

ydb/core/mind/hive/hive_impl.cpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <ydb/core/cms/console/console.h>
44
#include <ydb/core/cms/console/configs_dispatcher.h>
55
#include <ydb/core/protos/counters_hive.pb.h>
6+
#include <ydb/core/protos/node_broker.pb.h>
67
#include <ydb/core/util/tuples.h>
78
#include <ydb/library/yverify_stream/yverify_stream.h>
89
#include <ydb/library/actors/interconnect/interconnect.h>
@@ -96,10 +97,12 @@ void THive::RestartPipeTx(ui64 tabletId) {
9697

9798
bool THive::TryToDeleteNode(TNodeInfo* node) {
9899
if (node->CanBeDeleted()) {
100+
BLOG_I("TryToDeleteNode(" << node->Id << "): deleting");
99101
DeleteNode(node->Id);
100102
return true;
101103
}
102104
if (!node->DeletionScheduled) {
105+
BLOG_D("TryToDeleteNode(" << node->Id << "): waiting " << GetNodeDeletePeriod());
103106
Schedule(GetNodeDeletePeriod(), new TEvPrivate::TEvDeleteNode(node->Id));
104107
node->DeletionScheduled = true;
105108
}
@@ -989,8 +992,9 @@ void THive::OnActivateExecutor(const TActorContext&) {
989992
BuildLocalConfig();
990993
ClusterConfig = AppData()->HiveConfig;
991994
SpreadNeighbours = ClusterConfig.GetSpreadNeighbours();
995+
NodeBrokerEpoch = TDuration::MicroSeconds(NKikimrNodeBroker::TConfig().GetEpochDuration());
992996
Send(NConsole::MakeConfigsDispatcherID(SelfId().NodeId()),
993-
new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest(NKikimrConsole::TConfigItem::HiveConfigItem));
997+
new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest({NKikimrConsole::TConfigItem::HiveConfigItem, NKikimrConsole::TConfigItem::NodeBrokerConfigItem}));
994998
Execute(CreateInitScheme());
995999
if (!ResponsivenessPinger) {
9961000
ResponsivenessPinger = new TTabletResponsivenessPinger(TabletCounters->Simple()[NHive::COUNTER_RESPONSE_TIME_USEC], TDuration::Seconds(1));
@@ -2218,7 +2222,9 @@ void THive::Handle(TEvHive::TEvInitiateTabletExternalBoot::TPtr& ev) {
22182222
void THive::Handle(NConsole::TEvConsole::TEvConfigNotificationRequest::TPtr& ev) {
22192223
const NKikimrConsole::TConfigNotificationRequest& record = ev->Get()->Record;
22202224
ClusterConfig = record.GetConfig().GetHiveConfig();
2221-
BLOG_D("Received TEvConsole::TEvConfigNotificationRequest with update of cluster config: " << ClusterConfig.ShortDebugString());
2225+
NodeBrokerEpoch = TDuration::MicroSeconds(record.GetConfig().GetNodeBrokerConfig().GetEpochDuration());
2226+
BLOG_D("Received TEvConsole::TEvConfigNotificationRequest with update of cluster config: " << ClusterConfig.ShortDebugString()
2227+
<< "; " << record.GetConfig().GetNodeBrokerConfig().ShortDebugString());
22222228
BuildCurrentConfig();
22232229
Send(ev->Sender, new NConsole::TEvConsole::TEvConfigNotificationResponse(record), 0, ev->Cookie);
22242230
}

ydb/core/mind/hive/hive_impl.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
445445

446446
NKikimrConfig::THiveConfig ClusterConfig;
447447
NKikimrConfig::THiveConfig DatabaseConfig;
448+
TDuration NodeBrokerEpoch;
448449
std::unordered_map<TTabletTypes::EType, NKikimrConfig::THiveTabletLimit> TabletLimit; // built from CurrentConfig
449450
std::unordered_map<TTabletTypes::EType, NKikimrHive::TDataCentersPreference> DefaultDataCentersPreference;
450451
std::unordered_map<TDataCenterId, std::unordered_set<TNodeId>> RegisteredDataCenterNodes;
@@ -740,7 +741,11 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId
740741
}
741742

742743
TDuration GetNodeDeletePeriod() const {
743-
return TDuration::Seconds(CurrentConfig.GetNodeDeletePeriod());
744+
if (CurrentConfig.HasNodeDeletePeriod()) {
745+
return TDuration::Seconds(CurrentConfig.GetNodeDeletePeriod());
746+
} else {
747+
return NodeBrokerEpoch;
748+
}
744749
}
745750

746751
ui64 GetDrainInflight() const {

ydb/core/mind/hive/tx__load_everything.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -338,9 +338,9 @@ class TTxLoadEverything : public TTransactionBase<THive> {
338338
// it's safe to call here, because there is no any tablets in the node yet
339339
node.BecomeDisconnected();
340340
}
341-
if (node.CanBeDeleted()) {
341+
if (Self->TryToDeleteNode(&node)) {
342+
// node is deleted from hashmap
342343
db.Table<Schema::Node>().Key(nodeId).Delete();
343-
Self->Nodes.erase(nodeId);
344344
} else if (node.IsUnknown() && node.LocationAcquired) {
345345
Self->AddRegisteredDataCentersNode(node.Location.GetDataCenterId(), node.Id);
346346
}

ydb/core/protos/config.proto

+1-1
Original file line numberDiff line numberDiff line change
@@ -1439,7 +1439,7 @@ message THiveConfig {
14391439
optional double MinPeriodBetweenReassign = 33 [default = 300.0]; // seconds
14401440
optional double TabletRestartWatchPeriod = 34 [default = 3600.0]; // seconds
14411441
optional double NodeRestartWatchPeriod = 35 [default = 3600.0]; // seconds
1442-
optional uint64 NodeDeletePeriod = 36 [default = 14400]; // seconds
1442+
optional uint64 NodeDeletePeriod = 36 [default = 3600]; // seconds
14431443
repeated THiveTabletLimit DefaultTabletLimit = 37;
14441444
repeated THiveTabletPreference DefaultTabletPreference = 38;
14451445
optional uint64 SystemTabletCategoryId = 39 [default = 1];

0 commit comments

Comments
 (0)