@@ -398,8 +398,23 @@ void TDataShard::SendRegistrationRequestTimeCast(const TActorContext &ctx) {
398
398
if (RegistrationSended)
399
399
return ;
400
400
401
- if (!ProcessingParams)
401
+ if (!ProcessingParams) {
402
+ LOG_DEBUG_S (ctx, NKikimrServices::TX_DATASHARD, TabletID ()
403
+ << " not sending time cast registration request in state "
404
+ << DatashardStateName (State)
405
+ << " : missing processing params" );
402
406
return ;
407
+ }
408
+
409
+ if (State == TShardState::WaitScheme ||
410
+ State == TShardState::SplitDstReceivingSnapshot)
411
+ {
412
+ // We don't have all the necessary info yet
413
+ LOG_DEBUG_S (ctx, NKikimrServices::TX_DATASHARD, TabletID ()
414
+ << " not sending time cast registration request in state "
415
+ << DatashardStateName (State));
416
+ return ;
417
+ }
403
418
404
419
LOG_INFO_S (ctx, NKikimrServices::TX_DATASHARD, " Send registration request to time cast "
405
420
<< DatashardStateName (State) << " tabletId " << TabletID ()
@@ -1961,6 +1976,13 @@ TRowVersion TDataShard::GetMvccTxVersion(EMvccTxMode mode, TOperation* op) const
1961
1976
}
1962
1977
}
1963
1978
1979
+ LOG_TRACE_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " GetMvccTxVersion at " << TabletID ()
1980
+ << " CompleteEdge# " << SnapshotManager.GetCompleteEdge ()
1981
+ << " IncompleteEdge# " << SnapshotManager.GetIncompleteEdge ()
1982
+ << " UnprotectedReadEdge# " << SnapshotManager.GetUnprotectedReadEdge ()
1983
+ << " ImmediateWriteEdge# " << SnapshotManager.GetImmediateWriteEdge ()
1984
+ << " ImmediateWriteEdgeReplied# " << SnapshotManager.GetImmediateWriteEdgeReplied ());
1985
+
1964
1986
TRowVersion edge;
1965
1987
TRowVersion readEdge = Max (
1966
1988
SnapshotManager.GetCompleteEdge (),
@@ -2075,6 +2097,8 @@ TDataShard::TPromotePostExecuteEdges TDataShard::PromoteImmediatePostExecuteEdge
2075
2097
// We need to wait for completion until the flag is committed
2076
2098
res.WaitCompletion = true ;
2077
2099
}
2100
+ LOG_TRACE_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " PromoteImmediatePostExecuteEdges at " << TabletID ()
2101
+ << " promoting UnprotectedReadEdge to " << version);
2078
2102
SnapshotManager.PromoteUnprotectedReadEdge (version);
2079
2103
2080
2104
// We want to promote the complete edge when protected reads are
@@ -2237,6 +2261,19 @@ void TDataShard::SendAfterMediatorStepActivate(ui64 mediatorStep, const TActorCo
2237
2261
for (auto it = MediatorDelayedReplies.begin (); it != MediatorDelayedReplies.end ();) {
2238
2262
const ui64 step = it->first .Step ;
2239
2263
2264
+ if (SrcSplitDescription) {
2265
+ if (State == TShardState::SplitSrcSendingSnapshot ||
2266
+ State == TShardState::SplitSrcWaitForPartitioningChanged ||
2267
+ State == TShardState::PreOffline ||
2268
+ State == TShardState::Offline)
2269
+ {
2270
+ // We cannot send replies, since dst shard is now in charge
2271
+ // of keeping track of acknowledged writes. So we expect
2272
+ // split src logic to reboot this shard later.
2273
+ break ;
2274
+ }
2275
+ }
2276
+
2240
2277
if (step <= mediatorStep) {
2241
2278
SnapshotManager.PromoteImmediateWriteEdgeReplied (it->first );
2242
2279
Send (it->second .Target , it->second .Event .Release (), 0 , it->second .Cookie );
@@ -2304,13 +2341,16 @@ void TDataShard::CheckMediatorStateRestored() {
2304
2341
// HEAD reads must include that in their results.
2305
2342
const ui64 waitStep = CoordinatorPrevReadStepMax;
2306
2343
const ui64 readStep = CoordinatorPrevReadStepMax;
2307
-
2308
- LOG_DEBUG_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " CheckMediatorStateRestored: waitStep# " << waitStep << " readStep# " << readStep);
2344
+ const ui64 observedStep = GetMaxObservedStep ();
2345
+ LOG_DEBUG_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " CheckMediatorStateRestored at " << TabletID () << " :"
2346
+ << " waitStep# " << waitStep
2347
+ << " readStep# " << readStep
2348
+ << " observedStep# " << observedStep);
2309
2349
2310
2350
// WARNING: we must perform this check BEFORE we update unprotected read edge
2311
2351
// We may enter this code path multiple times, and we expect that the above
2312
2352
// read step may be refined while we wait based on pessimistic backup step.
2313
- if (GetMaxObservedStep () < waitStep) {
2353
+ if (observedStep < waitStep) {
2314
2354
// We need to wait until we observe mediator step that is at least
2315
2355
// as large as the step we found.
2316
2356
if (MediatorTimeCastWaitingSteps.insert (waitStep).second ) {
@@ -2331,7 +2371,10 @@ void TDataShard::CheckMediatorStateRestored() {
2331
2371
SnapshotManager.GetImmediateWriteEdge ().Step > SnapshotManager.GetCompleteEdge ().Step
2332
2372
? SnapshotManager.GetImmediateWriteEdge ().Prev ()
2333
2373
: TRowVersion::Min ();
2334
- SnapshotManager.PromoteUnprotectedReadEdge (Max (lastReadEdge, preImmediateWriteEdge));
2374
+ const TRowVersion edge = Max (lastReadEdge, preImmediateWriteEdge);
2375
+ LOG_TRACE_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " CheckMediatorStateRestored at " << TabletID ()
2376
+ << " promoting UnprotectedReadEdge to " << edge);
2377
+ SnapshotManager.PromoteUnprotectedReadEdge (edge);
2335
2378
}
2336
2379
2337
2380
// Promote the replied immediate write edge up to the currently observed step
@@ -2340,7 +2383,7 @@ void TDataShard::CheckMediatorStateRestored() {
2340
2383
// data that is definitely not replied yet.
2341
2384
if (SnapshotManager.GetImmediateWriteEdgeReplied () < SnapshotManager.GetImmediateWriteEdge ()) {
2342
2385
const ui64 writeStep = SnapshotManager.GetImmediateWriteEdge ().Step ;
2343
- const TRowVersion edge (GetMaxObservedStep () , Max<ui64>());
2386
+ const TRowVersion edge (observedStep , Max<ui64>());
2344
2387
SnapshotManager.PromoteImmediateWriteEdgeReplied (
2345
2388
Min (edge, SnapshotManager.GetImmediateWriteEdge ()));
2346
2389
// Try to ensure writes become visible sooner rather than later
@@ -2477,6 +2520,10 @@ bool TDataShard::CheckDataTxReject(const TString& opDescr,
2477
2520
rejectDescriptions.push_back (TStringBuilder ()
2478
2521
<< " is in process of split opId " << DstSplitOpId
2479
2522
<< " state " << DatashardStateName (State));
2523
+ } else if (State == TShardState::WaitScheme) {
2524
+ reject = true ;
2525
+ rejectReasons |= ERejectReasons::WrongState;
2526
+ rejectDescriptions.push_back (" is not created yet" );
2480
2527
} else if (State == TShardState::PreOffline || State == TShardState::Offline) {
2481
2528
reject = true ;
2482
2529
rejectStatus = NKikimrTxDataShard::TEvProposeTransactionResult::ERROR;
@@ -2639,6 +2686,11 @@ void TDataShard::Handle(TEvDataShard::TEvProposeTransaction::TPtr &ev, const TAc
2639
2686
auto * msg = ev->Get ();
2640
2687
LWTRACK (ProposeTransactionRequest, msg->Orbit );
2641
2688
2689
+ if (CheckDataTxRejectAndReply (ev, ctx)) {
2690
+ IncCounter (COUNTER_PREPARE_REQUEST);
2691
+ return ;
2692
+ }
2693
+
2642
2694
// Check if we need to delay an immediate transaction
2643
2695
if (MediatorStateWaiting &&
2644
2696
(ev->Get ()->GetFlags () & TTxFlags::Immediate) &&
@@ -2671,10 +2723,6 @@ void TDataShard::Handle(TEvDataShard::TEvProposeTransaction::TPtr &ev, const TAc
2671
2723
2672
2724
IncCounter (COUNTER_PREPARE_REQUEST);
2673
2725
2674
- if (CheckDataTxRejectAndReply (ev, ctx)) {
2675
- return ;
2676
- }
2677
-
2678
2726
switch (ev->Get ()->GetTxKind ()) {
2679
2727
case NKikimrTxDataShard::TX_KIND_DATA:
2680
2728
case NKikimrTxDataShard::TX_KIND_SCAN:
0 commit comments