Skip to content

Commit fdedcee

Browse files
authored
use adaptive timeouts and persistent node count in hive warmup KIKIMR-20551 (#624)
1 parent 7a8d731 commit fdedcee

File tree

7 files changed

+41
-12
lines changed

7 files changed

+41
-12
lines changed

ydb/core/mind/hive/hive_impl.cpp

+25-7
Original file line numberDiff line numberDiff line change
@@ -177,14 +177,30 @@ void THive::DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet, TSideEffects&
177177
sideEffects.Send(SelfId(), new TEvTabletBase::TEvDeleteTabletResult(NKikimrProto::OK, tablet->Id));
178178
}
179179

180+
TInstant THive::GetAllowedBootingTime() {
181+
auto connectedNodes = TabletCounters->Simple()[NHive::COUNTER_NODES_CONNECTED].Get();
182+
BLOG_D(connectedNodes << " nodes connected out of " << ExpectedNodes);
183+
if (connectedNodes == 0) {
184+
return {};
185+
}
186+
TInstant result = LastConnect + MaxTimeBetweenConnects * std::max<i64>(static_cast<i64>(ExpectedNodes) - static_cast<i64>(connectedNodes), 1);
187+
if (connectedNodes < ExpectedNodes) {
188+
result = std::max(result, StartTime() + GetWarmUpBootWaitingPeriod());
189+
}
190+
result = std::min(result, StartTime() + GetMaxWarmUpPeriod());
191+
return result;
192+
}
193+
180194
void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffects) {
181195
TInstant now = TActivationContext::Now();
182-
TInstant allowed = std::min(LastConnect + GetWarmUpBootWaitingPeriod(), StartTime() + GetMaxWarmUpPeriod());
183-
if (WarmUp && now < allowed) {
184-
BLOG_D("ProcessBootQueue - last connect was at " << LastConnect << "- not long enough ago");
185-
ProcessBootQueueScheduled = false;
186-
PostponeProcessBootQueue(allowed - now);
187-
return;
196+
if (WarmUp) {
197+
TInstant allowed = GetAllowedBootingTime();
198+
if (now < allowed) {
199+
BLOG_D("ProcessBootQueue - waiting until " << allowed << " because of warmup, now: " << now);
200+
ProcessBootQueueScheduled = false;
201+
PostponeProcessBootQueue(allowed - now);
202+
return;
203+
}
188204
}
189205
BLOG_D("Handle ProcessBootQueue (size: " << BootQueue.BootQueue.size() << ")");
190206
THPTimer bootQueueProcessingTimer;
@@ -304,9 +320,11 @@ void THive::ProcessBootQueue() {
304320
}
305321

306322
void THive::PostponeProcessBootQueue(TDuration after) {
307-
if (!ProcessBootQueuePostponed) {
323+
TInstant postponeUntil = TActivationContext::Now() + after;
324+
if (!ProcessBootQueuePostponed || postponeUntil < ProcessBootQueuePostponedUntil) {
308325
BLOG_D("PostponeProcessBootQueue (" << after << ")");
309326
ProcessBootQueuePostponed = true;
327+
ProcessBootQueuePostponedUntil = postponeUntil;
310328
Schedule(after, new TEvPrivate::TEvPostponeProcessBootQueue());
311329
}
312330
}

ydb/core/mind/hive/hive_impl.h

+4
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,10 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
362362
bool ProcessBootQueueScheduled = false;
363363
bool ProcessBootQueuePostponed = false;
364364
TInstant LastConnect;
365+
TInstant ProcessBootQueuePostponedUntil;
366+
TDuration MaxTimeBetweenConnects;
365367
bool WarmUp;
368+
ui64 ExpectedNodes;
366369

367370
THashMap<ui32, TEvInterconnect::TNodeInfo> NodesInfo;
368371
TTabletCountersBase* TabletCounters;
@@ -903,6 +906,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
903906
void ScheduleDisconnectNode(THolder<TEvPrivate::TEvProcessDisconnectNode> event);
904907
void DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet);
905908
void DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet, TSideEffects& sideEffects);
909+
TInstant GetAllowedBootingTime();
906910
void ScheduleUnlockTabletExecution(TNodeInfo& node);
907911
TString DebugDomainsActiveNodes() const;
908912
TResourceNormalizedValues GetStDevResourceValues() const;

ydb/core/mind/hive/hive_ut.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
672672
TMailboxType::Simple, 0,
673673
TMailboxType::Simple, 0);
674674
TTenantPoolConfig::TPtr tenantPoolConfig = new TTenantPoolConfig(localConfig);
675+
// tenantPoolConfig->AddStaticSlot(DOMAIN_NAME);
675676
tenantPoolConfig->AddStaticSlot(tenant);
676677

677678
TActorId actorId = runtime.Register(
@@ -1877,6 +1878,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
18771878

18781879
Ctest << "killing tablet " << tabletId << Endl;
18791880
runtime.Register(CreateTabletKiller(tabletId, runtime.GetNodeId(0)));
1881+
// runtime.Register(CreateTabletKiller(tabletId, runtime.GetNodeId(1)));
18801882

18811883
waitFor([&]{ return blockedCommits.size() >= 2; }, "at least 2 blocked commits");
18821884

ydb/core/mind/hive/monitoring.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1869,7 +1869,7 @@ function fillDataShort(result) {
18691869
if ("TotalTablets" in result) {
18701870
var percent = Math.floor(result.RunningTablets * 100 / result.TotalTablets) + '%';
18711871
var values = result.RunningTablets + ' of ' + result.TotalTablets;
1872-
var warmup = result.Warmup ? "<span class='glyphicon glyphicon-fire' style='color:red; margin-right:4px'></span>" : "";
1872+
var warmup = result.WarmUp ? "<span class='glyphicon glyphicon-fire' style='color:red; margin-right:4px'></span>" : "";
18731873
$('#runningTablets').html(warmup + percent + ' (' + values + ')');
18741874
$('#aliveNodes').html(result.AliveNodes);
18751875
$('#bootQueue').html(result.BootQueueSize);

ydb/core/mind/hive/tx__load_everything.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -727,7 +727,8 @@ class TTxLoadEverything : public TTransactionBase<THive> {
727727
Self->SetCounterTabletsTotal(tabletsTotal);
728728
Self->TabletCounters->Simple()[NHive::COUNTER_SEQUENCE_FREE].Set(Self->Sequencer.FreeSize());
729729
Self->TabletCounters->Simple()[NHive::COUNTER_SEQUENCE_ALLOCATED].Set(Self->Sequencer.AllocatedSequencesSize());
730-
Self->TabletCounters->Simple()[NHive::COUNTER_NODES_TOTAL].Set(Self->Nodes.size());
730+
Self->ExpectedNodes = Self->Nodes.size();
731+
Self->TabletCounters->Simple()[NHive::COUNTER_NODES_TOTAL].Set(Self->ExpectedNodes);
731732
Self->MigrationState = NKikimrHive::EMigrationState::MIGRATION_READY;
732733
ctx.Send(Self->SelfId(), new TEvPrivate::TEvBootTablets());
733734

ydb/core/mind/hive/tx__status.cpp

+5-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@ class TTxStatus : public TTransactionBase<THive> {
3333
}
3434
if (Self->WarmUp &&
3535
node.Statistics.RestartTimestampSize() < Self->GetNodeRestartsToIgnoreInWarmup()) {
36-
Self->LastConnect = TActivationContext::Now();
36+
TInstant now = TActivationContext::Now();
37+
if (Self->LastConnect != TInstant{}) {
38+
Self->MaxTimeBetweenConnects = std::max(Self->MaxTimeBetweenConnects, now - Self->LastConnect);
39+
}
40+
Self->LastConnect = now;
3741
}
3842
if (node.LocationAcquired) {
3943
NIceDb::TNiceDb db(txc.DB);

ydb/core/protos/config.proto

+2-2
Original file line numberDiff line numberDiff line change
@@ -1384,9 +1384,9 @@ message THiveConfig {
13841384
repeated NKikimrTabletBase.TTabletTypes.EType BalancerIgnoreTabletTypes = 49;
13851385
optional double SpaceUsagePenaltyThreshold = 53 [default = 1.1]; // number > 1
13861386
optional double SpaceUsagePenalty = 54 [default = 0.2]; // number <= 1
1387-
optional uint64 WarmUpBootWaitingPeriod = 50 [default = 5000]; // milliseconds
1387+
optional uint64 WarmUpBootWaitingPeriod = 50 [default = 30000]; // milliseconds, time to wait for known nodes on cluster restart
13881388
optional uint64 NodeRestartsToIgnoreInWarmup = 51 [default = 10];
1389-
optional double MaxWarmUpPeriod = 52 [default = 30.0]; // seconds
1389+
optional double MaxWarmUpPeriod = 52 [default = 600.0]; // seconds
13901390
optional bool WarmUpEnabled = 55 [default = true];
13911391
optional uint64 EmergencyBalancerInflight = 56 [default = 1]; // tablets
13921392
optional uint64 MaxMovementsOnEmergencyBalancer = 57 [default = 2];

0 commit comments

Comments
 (0)