Skip to content

Commit 77bb715

Browse files
committed
warmup 2.0
1 parent d2bd7d2 commit 77bb715

File tree

4 files changed

+33
-8
lines changed

4 files changed

+33
-8
lines changed

ydb/core/mind/hive/hive_impl.cpp

+26-7
Original file line numberDiff line numberDiff line change
@@ -175,14 +175,31 @@ void THive::DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet, TSideEffects&
175175
sideEffects.Send(SelfId(), new TEvTabletBase::TEvDeleteTabletResult(NKikimrProto::OK, tablet->Id));
176176
}
177177

178+
TInstant THive::GetAllowedBootingTime() {
179+
TDuration passed = LastConnect - StartTime();
180+
i64 connectedNodes = TabletCounters->Simple()[NHive::COUNTER_NODES_CONNECTED].Get();
181+
BLOG_D(connectedNodes << " nodes connected out of " << ExpectedNodes);
182+
if (connectedNodes == 0) {
183+
return {};
184+
}
185+
TDuration avgConnectTime = passed / connectedNodes;
186+
TInstant result = LastConnect + avgConnectTime * std::max<i64>(ExpectedNodes - connectedNodes, 1);
187+
if (connectedNodes < ExpectedNodes) {
188+
result = std::max(result, StartTime() + GetMaxWarmUpPeriod());
189+
}
190+
return result;
191+
}
192+
178193
void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffects) {
179194
TInstant now = TActivationContext::Now();
180-
TInstant allowed = std::min(LastConnect + GetWarmUpBootWaitingPeriod(), StartTime() + GetMaxWarmUpPeriod());
181-
if (WarmUp && now < allowed) {
182-
BLOG_D("ProcessBootQueue - last connect was at " << LastConnect << "- not long enough ago");
183-
ProcessBootQueueScheduled = false;
184-
PostponeProcessBootQueue(allowed - now);
185-
return;
195+
if (WarmUp) {
196+
TInstant allowed = GetAllowedBootingTime();
197+
if (now < allowed) {
198+
BLOG_D("ProcessBootQueue - waiting unitl " << allowed << " because of warmup, now: " << now);
199+
ProcessBootQueueScheduled = false;
200+
PostponeProcessBootQueue(allowed - now);
201+
return;
202+
}
186203
}
187204
BLOG_D("Handle ProcessBootQueue (size: " << BootQueue.BootQueue.size() << ")");
188205
THPTimer bootQueueProcessingTimer;
@@ -302,9 +319,11 @@ void THive::ProcessBootQueue() {
302319
}
303320

304321
void THive::PostponeProcessBootQueue(TDuration after) {
305-
if (!ProcessBootQueuePostponed) {
322+
TInstant postponeUntil = TActivationContext::Now() + after;
323+
if (!ProcessBootQueuePostponed || postponeUntil < ProcessBootQueuePostponedUntil) {
306324
BLOG_D("PostponeProcessBootQueue (" << after << ")");
307325
ProcessBootQueuePostponed = true;
326+
ProcessBootQueuePostponedUntil = postponeUntil;
308327
Schedule(after, new TEvPrivate::TEvPostponeProcessBootQueue());
309328
}
310329
}

ydb/core/mind/hive/hive_impl.h

+3
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,9 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
362362
bool ProcessBootQueueScheduled = false;
363363
bool ProcessBootQueuePostponed = false;
364364
TInstant LastConnect;
365+
TInstant ProcessBootQueuePostponedUntil;
365366
bool WarmUp;
367+
i64 ExpectedNodes;
366368

367369
THashMap<ui32, TEvInterconnect::TNodeInfo> NodesInfo;
368370
TTabletCountersBase* TabletCounters;
@@ -901,6 +903,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
901903
void ScheduleDisconnectNode(THolder<TEvPrivate::TEvProcessDisconnectNode> event);
902904
void DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet);
903905
void DeleteTabletWithoutStorage(TLeaderTabletInfo* tablet, TSideEffects& sideEffects);
906+
TInstant GetAllowedBootingTime();
904907
void ScheduleUnlockTabletExecution(TNodeInfo& node);
905908
TString DebugDomainsActiveNodes() const;
906909
TResourceNormalizedValues GetStDevResourceValues() const;

ydb/core/mind/hive/hive_ut.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
672672
TMailboxType::Simple, 0,
673673
TMailboxType::Simple, 0);
674674
TTenantPoolConfig::TPtr tenantPoolConfig = new TTenantPoolConfig(localConfig);
675+
// tenantPoolConfig->AddStaticSlot(DOMAIN_NAME);
675676
tenantPoolConfig->AddStaticSlot(tenant);
676677

677678
TActorId actorId = runtime.Register(
@@ -1877,6 +1878,7 @@ Y_UNIT_TEST_SUITE(THiveTest) {
18771878

18781879
Ctest << "killing tablet " << tabletId << Endl;
18791880
runtime.Register(CreateTabletKiller(tabletId, runtime.GetNodeId(0)));
1881+
// runtime.Register(CreateTabletKiller(tabletId, runtime.GetNodeId(1)));
18801882

18811883
waitFor([&]{ return blockedCommits.size() >= 2; }, "at least 2 blocked commits");
18821884

ydb/core/mind/hive/tx__load_everything.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -723,7 +723,8 @@ class TTxLoadEverything : public TTransactionBase<THive> {
723723
Self->SetCounterTabletsTotal(tabletsTotal);
724724
Self->TabletCounters->Simple()[NHive::COUNTER_SEQUENCE_FREE].Set(Self->Sequencer.FreeSize());
725725
Self->TabletCounters->Simple()[NHive::COUNTER_SEQUENCE_ALLOCATED].Set(Self->Sequencer.AllocatedSequencesSize());
726-
Self->TabletCounters->Simple()[NHive::COUNTER_NODES_TOTAL].Set(Self->Nodes.size());
726+
Self->ExpectedNodes = Self->Nodes.size();
727+
Self->TabletCounters->Simple()[NHive::COUNTER_NODES_TOTAL].Set(Self->ExpectedNodes);
727728
Self->MigrationState = NKikimrHive::EMigrationState::MIGRATION_READY;
728729
ctx.Send(Self->SelfId(), new TEvPrivate::TEvBootTablets());
729730

0 commit comments

Comments
 (0)