@@ -399,8 +399,23 @@ void TDataShard::SendRegistrationRequestTimeCast(const TActorContext &ctx) {
399
399
if (RegistrationSended)
400
400
return ;
401
401
402
- if (!ProcessingParams)
402
+ if (!ProcessingParams) {
403
+ LOG_DEBUG_S (ctx, NKikimrServices::TX_DATASHARD, TabletID ()
404
+ << " not sending time cast registration request in state "
405
+ << DatashardStateName (State)
406
+ << " : missing processing params" );
403
407
return ;
408
+ }
409
+
410
+ if (State == TShardState::WaitScheme ||
411
+ State == TShardState::SplitDstReceivingSnapshot)
412
+ {
413
+ // We don't have all the necessary info yet
414
+ LOG_DEBUG_S (ctx, NKikimrServices::TX_DATASHARD, TabletID ()
415
+ << " not sending time cast registration request in state "
416
+ << DatashardStateName (State));
417
+ return ;
418
+ }
404
419
405
420
LOG_INFO_S (ctx, NKikimrServices::TX_DATASHARD, " Send registration request to time cast "
406
421
<< DatashardStateName (State) << " tabletId " << TabletID ()
@@ -2028,6 +2043,13 @@ TRowVersion TDataShard::GetMvccTxVersion(EMvccTxMode mode, TOperation* op) const
2028
2043
}
2029
2044
}
2030
2045
2046
+ LOG_TRACE_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " GetMvccTxVersion at " << TabletID ()
2047
+ << " CompleteEdge# " << SnapshotManager.GetCompleteEdge ()
2048
+ << " IncompleteEdge# " << SnapshotManager.GetIncompleteEdge ()
2049
+ << " UnprotectedReadEdge# " << SnapshotManager.GetUnprotectedReadEdge ()
2050
+ << " ImmediateWriteEdge# " << SnapshotManager.GetImmediateWriteEdge ()
2051
+ << " ImmediateWriteEdgeReplied# " << SnapshotManager.GetImmediateWriteEdgeReplied ());
2052
+
2031
2053
TRowVersion edge;
2032
2054
TRowVersion readEdge = Max (
2033
2055
SnapshotManager.GetCompleteEdge (),
@@ -2142,6 +2164,8 @@ TDataShard::TPromotePostExecuteEdges TDataShard::PromoteImmediatePostExecuteEdge
2142
2164
// We need to wait for completion until the flag is committed
2143
2165
res.WaitCompletion = true ;
2144
2166
}
2167
+ LOG_TRACE_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " PromoteImmediatePostExecuteEdges at " << TabletID ()
2168
+ << " promoting UnprotectedReadEdge to " << version);
2145
2169
SnapshotManager.PromoteUnprotectedReadEdge (version);
2146
2170
2147
2171
// We want to promote the complete edge when protected reads are
@@ -2304,6 +2328,19 @@ void TDataShard::SendAfterMediatorStepActivate(ui64 mediatorStep, const TActorCo
2304
2328
for (auto it = MediatorDelayedReplies.begin (); it != MediatorDelayedReplies.end ();) {
2305
2329
const ui64 step = it->first .Step ;
2306
2330
2331
+ if (SrcSplitDescription) {
2332
+ if (State == TShardState::SplitSrcSendingSnapshot ||
2333
+ State == TShardState::SplitSrcWaitForPartitioningChanged ||
2334
+ State == TShardState::PreOffline ||
2335
+ State == TShardState::Offline)
2336
+ {
2337
+ // We cannot send replies, since dst shard is now in charge
2338
+ // of keeping track of acknowledged writes. So we expect
2339
+ // split src logic to reboot this shard later.
2340
+ break ;
2341
+ }
2342
+ }
2343
+
2307
2344
if (step <= mediatorStep) {
2308
2345
SnapshotManager.PromoteImmediateWriteEdgeReplied (it->first );
2309
2346
Send (it->second .Target , it->second .Event .Release (), 0 , it->second .Cookie );
@@ -2371,13 +2408,16 @@ void TDataShard::CheckMediatorStateRestored() {
2371
2408
// HEAD reads must include that in their results.
2372
2409
const ui64 waitStep = CoordinatorPrevReadStepMax;
2373
2410
const ui64 readStep = CoordinatorPrevReadStepMax;
2374
-
2375
- LOG_DEBUG_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " CheckMediatorStateRestored: waitStep# " << waitStep << " readStep# " << readStep);
2411
+ const ui64 observedStep = GetMaxObservedStep ();
2412
+ LOG_DEBUG_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " CheckMediatorStateRestored at " << TabletID () << " :"
2413
+ << " waitStep# " << waitStep
2414
+ << " readStep# " << readStep
2415
+ << " observedStep# " << observedStep);
2376
2416
2377
2417
// WARNING: we must perform this check BEFORE we update unprotected read edge
2378
2418
// We may enter this code path multiple times, and we expect that the above
2379
2419
// read step may be refined while we wait based on pessimistic backup step.
2380
- if (GetMaxObservedStep () < waitStep) {
2420
+ if (observedStep < waitStep) {
2381
2421
// We need to wait until we observe mediator step that is at least
2382
2422
// as large as the step we found.
2383
2423
if (MediatorTimeCastWaitingSteps.insert (waitStep).second ) {
@@ -2398,7 +2438,10 @@ void TDataShard::CheckMediatorStateRestored() {
2398
2438
SnapshotManager.GetImmediateWriteEdge ().Step > SnapshotManager.GetCompleteEdge ().Step
2399
2439
? SnapshotManager.GetImmediateWriteEdge ().Prev ()
2400
2440
: TRowVersion::Min ();
2401
- SnapshotManager.PromoteUnprotectedReadEdge (Max (lastReadEdge, preImmediateWriteEdge));
2441
+ const TRowVersion edge = Max (lastReadEdge, preImmediateWriteEdge);
2442
+ LOG_TRACE_S (*TlsActivationContext, NKikimrServices::TX_DATASHARD, " CheckMediatorStateRestored at " << TabletID ()
2443
+ << " promoting UnprotectedReadEdge to " << edge);
2444
+ SnapshotManager.PromoteUnprotectedReadEdge (edge);
2402
2445
}
2403
2446
2404
2447
// Promote the replied immediate write edge up to the currently observed step
@@ -2407,7 +2450,7 @@ void TDataShard::CheckMediatorStateRestored() {
2407
2450
// data that is definitely not replied yet.
2408
2451
if (SnapshotManager.GetImmediateWriteEdgeReplied () < SnapshotManager.GetImmediateWriteEdge ()) {
2409
2452
const ui64 writeStep = SnapshotManager.GetImmediateWriteEdge ().Step ;
2410
- const TRowVersion edge (GetMaxObservedStep () , Max<ui64>());
2453
+ const TRowVersion edge (observedStep , Max<ui64>());
2411
2454
SnapshotManager.PromoteImmediateWriteEdgeReplied (
2412
2455
Min (edge, SnapshotManager.GetImmediateWriteEdge ()));
2413
2456
// Try to ensure writes become visible sooner rather than later
@@ -2544,6 +2587,10 @@ bool TDataShard::CheckDataTxReject(const TString& opDescr,
2544
2587
rejectDescriptions.push_back (TStringBuilder ()
2545
2588
<< " is in process of split opId " << DstSplitOpId
2546
2589
<< " state " << DatashardStateName (State));
2590
+ } else if (State == TShardState::WaitScheme) {
2591
+ reject = true ;
2592
+ rejectReasons |= ERejectReasons::WrongState;
2593
+ rejectDescriptions.push_back (" is not created yet" );
2547
2594
} else if (State == TShardState::PreOffline || State == TShardState::Offline) {
2548
2595
reject = true ;
2549
2596
rejectStatus = NKikimrTxDataShard::TEvProposeTransactionResult::ERROR;
@@ -2706,6 +2753,11 @@ void TDataShard::Handle(TEvDataShard::TEvProposeTransaction::TPtr &ev, const TAc
2706
2753
auto * msg = ev->Get ();
2707
2754
LWTRACK (ProposeTransactionRequest, msg->Orbit );
2708
2755
2756
+ if (CheckDataTxRejectAndReply (ev, ctx)) {
2757
+ IncCounter (COUNTER_PREPARE_REQUEST);
2758
+ return ;
2759
+ }
2760
+
2709
2761
// Check if we need to delay an immediate transaction
2710
2762
if (MediatorStateWaiting &&
2711
2763
(ev->Get ()->GetFlags () & TTxFlags::Immediate) &&
@@ -2738,10 +2790,6 @@ void TDataShard::Handle(TEvDataShard::TEvProposeTransaction::TPtr &ev, const TAc
2738
2790
2739
2791
IncCounter (COUNTER_PREPARE_REQUEST);
2740
2792
2741
- if (CheckDataTxRejectAndReply (ev, ctx)) {
2742
- return ;
2743
- }
2744
-
2745
2793
switch (ev->Get ()->GetTxKind ()) {
2746
2794
case NKikimrTxDataShard::TX_KIND_DATA:
2747
2795
case NKikimrTxDataShard::TX_KIND_SCAN:
0 commit comments