@@ -398,8 +398,23 @@ void TDataShard::SendRegistrationRequestTimeCast(const TActorContext &ctx) {
398
398
if (RegistrationSended)
399
399
return ;
400
400
401
- if (!ProcessingParams)
401
+ if (!ProcessingParams) {
402
+ LOG_DEBUG_S (ctx, NKikimrServices::TX_DATASHARD, TabletID ()
403
+ << " not sending time cast registration request in state "
404
+ << DatashardStateName (State)
405
+ << " : missing processing params" );
402
406
return ;
407
+ }
408
+
409
+ if (State == TShardState::WaitScheme ||
410
+ State == TShardState::SplitDstReceivingSnapshot)
411
+ {
412
+ // We don't have all the necessary info yet
413
+ LOG_DEBUG_S (ctx, NKikimrServices::TX_DATASHARD, TabletID ()
414
+ << " not sending time cast registration request in state "
415
+ << DatashardStateName (State));
416
+ return ;
417
+ }
403
418
404
419
LOG_INFO_S (ctx, NKikimrServices::TX_DATASHARD, " Send registration request to time cast "
405
420
<< DatashardStateName (State) << " tabletId " << TabletID ()
@@ -2027,6 +2042,13 @@ TRowVersion TDataShard::GetMvccTxVersion(EMvccTxMode mode, TOperation* op) const
2027
2042
}
2028
2043
}
2029
2044
2045
+ LOG_TRACE_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " GetMvccTxVersion at " << TabletID ()
2046
+ << " CompleteEdge# " << SnapshotManager.GetCompleteEdge ()
2047
+ << " IncompleteEdge# " << SnapshotManager.GetIncompleteEdge ()
2048
+ << " UnprotectedReadEdge# " << SnapshotManager.GetUnprotectedReadEdge ()
2049
+ << " ImmediateWriteEdge# " << SnapshotManager.GetImmediateWriteEdge ()
2050
+ << " ImmediateWriteEdgeReplied# " << SnapshotManager.GetImmediateWriteEdgeReplied ());
2051
+
2030
2052
TRowVersion edge;
2031
2053
TRowVersion readEdge = Max (
2032
2054
SnapshotManager.GetCompleteEdge (),
@@ -2141,6 +2163,8 @@ TDataShard::TPromotePostExecuteEdges TDataShard::PromoteImmediatePostExecuteEdge
2141
2163
// We need to wait for completion until the flag is committed
2142
2164
res.WaitCompletion = true ;
2143
2165
}
2166
+ LOG_TRACE_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " PromoteImmediatePostExecuteEdges at " << TabletID ()
2167
+ << " promoting UnprotectedReadEdge to " << version);
2144
2168
SnapshotManager.PromoteUnprotectedReadEdge (version);
2145
2169
2146
2170
// We want to promote the complete edge when protected reads are
@@ -2303,6 +2327,19 @@ void TDataShard::SendAfterMediatorStepActivate(ui64 mediatorStep, const TActorCo
2303
2327
for (auto it = MediatorDelayedReplies.begin (); it != MediatorDelayedReplies.end ();) {
2304
2328
const ui64 step = it->first .Step ;
2305
2329
2330
+ if (SrcSplitDescription) {
2331
+ if (State == TShardState::SplitSrcSendingSnapshot ||
2332
+ State == TShardState::SplitSrcWaitForPartitioningChanged ||
2333
+ State == TShardState::PreOffline ||
2334
+ State == TShardState::Offline)
2335
+ {
2336
+ // We cannot send replies, since dst shard is now in charge
2337
+ // of keeping track of acknowledged writes. So we expect
2338
+ // split src logic to reboot this shard later.
2339
+ break ;
2340
+ }
2341
+ }
2342
+
2306
2343
if (step <= mediatorStep) {
2307
2344
SnapshotManager.PromoteImmediateWriteEdgeReplied (it->first );
2308
2345
Send (it->second .Target , it->second .Event .Release (), 0 , it->second .Cookie );
@@ -2370,13 +2407,16 @@ void TDataShard::CheckMediatorStateRestored() {
2370
2407
// HEAD reads must include that in their results.
2371
2408
const ui64 waitStep = CoordinatorPrevReadStepMax;
2372
2409
const ui64 readStep = CoordinatorPrevReadStepMax;
2373
-
2374
- LOG_DEBUG_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " CheckMediatorStateRestored: waitStep# " << waitStep << " readStep# " << readStep);
2410
+ const ui64 observedStep = GetMaxObservedStep ();
2411
+ LOG_DEBUG_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " CheckMediatorStateRestored at " << TabletID () << " :"
2412
+ << " waitStep# " << waitStep
2413
+ << " readStep# " << readStep
2414
+ << " observedStep# " << observedStep);
2375
2415
2376
2416
// WARNING: we must perform this check BEFORE we update unprotected read edge
2377
2417
// We may enter this code path multiple times, and we expect that the above
2378
2418
// read step may be refined while we wait based on pessimistic backup step.
2379
- if (GetMaxObservedStep () < waitStep) {
2419
+ if (observedStep < waitStep) {
2380
2420
// We need to wait until we observe mediator step that is at least
2381
2421
// as large as the step we found.
2382
2422
if (MediatorTimeCastWaitingSteps.insert (waitStep).second ) {
@@ -2397,7 +2437,10 @@ void TDataShard::CheckMediatorStateRestored() {
2397
2437
SnapshotManager.GetImmediateWriteEdge ().Step > SnapshotManager.GetCompleteEdge ().Step
2398
2438
? SnapshotManager.GetImmediateWriteEdge ().Prev ()
2399
2439
: TRowVersion::Min ();
2400
- SnapshotManager.PromoteUnprotectedReadEdge (Max (lastReadEdge, preImmediateWriteEdge));
2440
+ const TRowVersion edge = Max (lastReadEdge, preImmediateWriteEdge);
2441
+ LOG_TRACE_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " CheckMediatorStateRestored at " << TabletID ()
2442
+ << " promoting UnprotectedReadEdge to " << edge);
2443
+ SnapshotManager.PromoteUnprotectedReadEdge (edge);
2401
2444
}
2402
2445
2403
2446
// Promote the replied immediate write edge up to the currently observed step
@@ -2406,7 +2449,7 @@ void TDataShard::CheckMediatorStateRestored() {
2406
2449
// data that is definitely not replied yet.
2407
2450
if (SnapshotManager.GetImmediateWriteEdgeReplied () < SnapshotManager.GetImmediateWriteEdge ()) {
2408
2451
const ui64 writeStep = SnapshotManager.GetImmediateWriteEdge ().Step ;
2409
- const TRowVersion edge (GetMaxObservedStep () , Max<ui64>());
2452
+ const TRowVersion edge (observedStep , Max<ui64>());
2410
2453
SnapshotManager.PromoteImmediateWriteEdgeReplied (
2411
2454
Min (edge, SnapshotManager.GetImmediateWriteEdge ()));
2412
2455
// Try to ensure writes become visible sooner rather than later
@@ -2543,6 +2586,10 @@ bool TDataShard::CheckDataTxReject(const TString& opDescr,
2543
2586
rejectDescriptions.push_back (TStringBuilder ()
2544
2587
<< " is in process of split opId " << DstSplitOpId
2545
2588
<< " state " << DatashardStateName (State));
2589
+ } else if (State == TShardState::WaitScheme) {
2590
+ reject = true ;
2591
+ rejectReasons |= ERejectReasons::WrongState;
2592
+ rejectDescriptions.push_back (" is not created yet" );
2546
2593
} else if (State == TShardState::PreOffline || State == TShardState::Offline) {
2547
2594
reject = true ;
2548
2595
rejectStatus = NKikimrTxDataShard::TEvProposeTransactionResult::ERROR;
@@ -2705,6 +2752,11 @@ void TDataShard::Handle(TEvDataShard::TEvProposeTransaction::TPtr &ev, const TAc
2705
2752
auto * msg = ev->Get ();
2706
2753
LWTRACK (ProposeTransactionRequest, msg->Orbit );
2707
2754
2755
+ if (CheckDataTxRejectAndReply (ev, ctx)) {
2756
+ IncCounter (COUNTER_PREPARE_REQUEST);
2757
+ return ;
2758
+ }
2759
+
2708
2760
// Check if we need to delay an immediate transaction
2709
2761
if (MediatorStateWaiting &&
2710
2762
(ev->Get ()->GetFlags () & TTxFlags::Immediate) &&
@@ -2737,10 +2789,6 @@ void TDataShard::Handle(TEvDataShard::TEvProposeTransaction::TPtr &ev, const TAc
2737
2789
2738
2790
IncCounter (COUNTER_PREPARE_REQUEST);
2739
2791
2740
- if (CheckDataTxRejectAndReply (ev, ctx)) {
2741
- return ;
2742
- }
2743
-
2744
2792
switch (ev->Get ()->GetTxKind ()) {
2745
2793
case NKikimrTxDataShard::TX_KIND_DATA:
2746
2794
case NKikimrTxDataShard::TX_KIND_SCAN:
0 commit comments