Skip to content

Commit 2a79013

Browse files
committed
Fix init race at the end of dst split
1 parent 2036e13 commit 2a79013

File tree

1 file changed

+33
-6
lines changed

1 file changed

+33
-6
lines changed

ydb/core/tx/datashard/datashard_split_dst.cpp

+33-6
Original file line numberDiff line numberDiff line change
@@ -292,9 +292,9 @@ class TDataShard::TTxSplitTransferSnapshot : public NTabletFlatExecutor::TTransa
292292
Self->PromoteFollowerReadEdge(txc);
293293
}
294294

295-
Self->State = TShardState::Ready;
296-
Self->PersistSys(db, Schema::Sys_State, Self->State);
297-
Self->SendRegistrationRequestTimeCast(ctx);
295+
// Note: we persist Ready, but keep current state in memory until Complete
296+
Self->SetPersistState(TShardState::Ready, txc);
297+
Self->State = TShardState::SplitDstReceivingSnapshot;
298298
}
299299

300300
return true;
@@ -308,9 +308,36 @@ class TDataShard::TTxSplitTransferSnapshot : public NTabletFlatExecutor::TTransa
308308

309309
ctx.Send(ackTo, new TEvDataShard::TEvSplitTransferSnapshotAck(opId, Self->TabletID()));
310310

311-
if (LastSnapshotReceived) {
312-
// We have received all the data, reload everything from the received system tables
313-
Self->Execute(Self->CreateTxInit(), ctx);
311+
// Note: we skip init in an unlikely event of state resetting between Execute and Complete
312+
if (LastSnapshotReceived && Self->State == TShardState::SplitDstReceivingSnapshot) {
313+
// We have received all the data, finish shard initialization
314+
// Note: previously we used TxInit, however received system tables
315+
// have been empty for years now, and since pipes are still open we
316+
// may receive requests between TxInit loading the Ready state and
317+
// its Complete method initializing everything properly. Instead
318+
// necessary steps are repeated here.
319+
Self->State = TShardState::Ready;
320+
321+
// We are already in StateWork, but we need to repeat many steps now that we are Ready
322+
Self->SwitchToWork(ctx);
323+
324+
// We can send the registration request now that we are ready
325+
Self->SendRegistrationRequestTimeCast(ctx);
326+
327+
// Initialize snapshot expiration queue with current context time
328+
Self->GetSnapshotManager().InitExpireQueue(ctx.Now());
329+
if (Self->GetSnapshotManager().HasExpiringSnapshots()) {
330+
Self->PlanCleanup(ctx);
331+
}
332+
333+
// Initialize change senders
334+
Self->KillChangeSender(ctx);
335+
Self->CreateChangeSender(ctx);
336+
Self->MaybeActivateChangeSender(ctx);
337+
Self->EmitHeartbeats();
338+
339+
// Switch mvcc state if needed
340+
Self->CheckMvccStateChangeCanStart(ctx);
314341
}
315342
}
316343
};

0 commit comments

Comments
 (0)