Skip to content

Commit 9903295

Browse files
authored
improve node deletion in hive (ydb-platform#7218) (ydb-platform#10393)
1 parent 84b4690 commit 9903295

File tree

4 files changed

+17
-6
lines changed

4 files changed

+17
-6
lines changed

ydb/core/mind/hive/hive_impl.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <ydb/core/cms/console/console.h>
44
#include <ydb/core/cms/console/configs_dispatcher.h>
55
#include <ydb/core/protos/counters_hive.pb.h>
6+
#include <ydb/core/protos/node_broker.pb.h>
67
#include <ydb/core/util/tuples.h>
78
#include <ydb/library/yverify_stream/yverify_stream.h>
89
#include <ydb/library/actors/interconnect/interconnect.h>
@@ -96,10 +97,12 @@ void THive::RestartPipeTx(ui64 tabletId) {
9697

9798
bool THive::TryToDeleteNode(TNodeInfo* node) {
9899
if (node->CanBeDeleted()) {
100+
BLOG_I("TryToDeleteNode(" << node->Id << "): deleting");
99101
DeleteNode(node->Id);
100102
return true;
101103
}
102104
if (!node->DeletionScheduled) {
105+
BLOG_D("TryToDeleteNode(" << node->Id << "): waiting " << GetNodeDeletePeriod());
103106
Schedule(GetNodeDeletePeriod(), new TEvPrivate::TEvDeleteNode(node->Id));
104107
node->DeletionScheduled = true;
105108
}
@@ -987,8 +990,9 @@ void THive::OnActivateExecutor(const TActorContext&) {
987990
BuildLocalConfig();
988991
ClusterConfig = AppData()->HiveConfig;
989992
SpreadNeighbours = ClusterConfig.GetSpreadNeighbours();
993+
NodeBrokerEpoch = TDuration::MicroSeconds(NKikimrNodeBroker::TConfig().GetEpochDuration());
990994
Send(NConsole::MakeConfigsDispatcherID(SelfId().NodeId()),
991-
new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest(NKikimrConsole::TConfigItem::HiveConfigItem));
995+
new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest({NKikimrConsole::TConfigItem::HiveConfigItem, NKikimrConsole::TConfigItem::NodeBrokerConfigItem}));
992996
Execute(CreateInitScheme());
993997
if (!ResponsivenessPinger) {
994998
ResponsivenessPinger = new TTabletResponsivenessPinger(TabletCounters->Simple()[NHive::COUNTER_RESPONSE_TIME_USEC], TDuration::Seconds(1));
@@ -2208,7 +2212,9 @@ void THive::Handle(TEvHive::TEvInitiateTabletExternalBoot::TPtr& ev) {
22082212
void THive::Handle(NConsole::TEvConsole::TEvConfigNotificationRequest::TPtr& ev) {
22092213
const NKikimrConsole::TConfigNotificationRequest& record = ev->Get()->Record;
22102214
ClusterConfig = record.GetConfig().GetHiveConfig();
2211-
BLOG_D("Received TEvConsole::TEvConfigNotificationRequest with update of cluster config: " << ClusterConfig.ShortDebugString());
2215+
NodeBrokerEpoch = TDuration::MicroSeconds(record.GetConfig().GetNodeBrokerConfig().GetEpochDuration());
2216+
BLOG_D("Received TEvConsole::TEvConfigNotificationRequest with update of cluster config: " << ClusterConfig.ShortDebugString()
2217+
<< "; " << record.GetConfig().GetNodeBrokerConfig().ShortDebugString());
22122218
BuildCurrentConfig();
22132219
Send(ev->Sender, new NConsole::TEvConsole::TEvConfigNotificationResponse(record), 0, ev->Cookie);
22142220
}

ydb/core/mind/hive/hive_impl.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
445445

446446
NKikimrConfig::THiveConfig ClusterConfig;
447447
NKikimrConfig::THiveConfig DatabaseConfig;
448+
TDuration NodeBrokerEpoch;
448449
std::unordered_map<TTabletTypes::EType, NKikimrConfig::THiveTabletLimit> TabletLimit; // built from CurrentConfig
449450
std::unordered_map<TTabletTypes::EType, NKikimrHive::TDataCentersPreference> DefaultDataCentersPreference;
450451
std::unordered_map<TDataCenterId, std::unordered_set<TNodeId>> RegisteredDataCenterNodes;
@@ -739,7 +740,11 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId
739740
}
740741

741742
TDuration GetNodeDeletePeriod() const {
742-
return TDuration::Seconds(CurrentConfig.GetNodeDeletePeriod());
743+
if (CurrentConfig.HasNodeDeletePeriod()) {
744+
return TDuration::Seconds(CurrentConfig.GetNodeDeletePeriod());
745+
} else {
746+
return NodeBrokerEpoch;
747+
}
743748
}
744749

745750
ui64 GetDrainInflight() const {

ydb/core/mind/hive/tx__load_everything.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,9 +338,9 @@ class TTxLoadEverything : public TTransactionBase<THive> {
338338
// it's safe to call here, because there is no any tablets in the node yet
339339
node.BecomeDisconnected();
340340
}
341-
if (node.CanBeDeleted()) {
341+
if (Self->TryToDeleteNode(&node)) {
342+
// node is deleted from hashmap
342343
db.Table<Schema::Node>().Key(nodeId).Delete();
343-
Self->Nodes.erase(nodeId);
344344
} else if (node.IsUnknown() && node.LocationAcquired) {
345345
Self->AddRegisteredDataCentersNode(node.Location.GetDataCenterId(), node.Id);
346346
}

ydb/core/protos/config.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1458,7 +1458,7 @@ message THiveConfig {
14581458
optional double MinPeriodBetweenReassign = 33 [default = 300.0]; // seconds
14591459
optional double TabletRestartWatchPeriod = 34 [default = 3600.0]; // seconds
14601460
optional double NodeRestartWatchPeriod = 35 [default = 3600.0]; // seconds
1461-
optional uint64 NodeDeletePeriod = 36 [default = 14400]; // seconds
1461+
optional uint64 NodeDeletePeriod = 36 [default = 3600]; // seconds
14621462
repeated THiveTabletLimit DefaultTabletLimit = 37;
14631463
repeated THiveTabletPreference DefaultTabletPreference = 38;
14641464
optional uint64 SystemTabletCategoryId = 39 [default = 1];

0 commit comments

Comments
 (0)