@@ -59,8 +59,8 @@ namespace {
59
59
public:
60
60
struct TInFlightBatch {
61
61
TString Data;
62
- ui32 SendAttempts = 0 ;
63
62
ui64 Cookie = 0 ;
63
+ ui32 SendAttempts = 0 ;
64
64
};
65
65
66
66
size_t Size () const {
@@ -112,8 +112,8 @@ namespace {
112
112
YQL_ENSURE (!IsClosed ());
113
113
Batches.push_back (TInFlightBatch{
114
114
.Data = std::move (data),
115
- .SendAttempts = 0 ,
116
115
.Cookie = ++NextCookie,
116
+ .SendAttempts = 0 ,
117
117
});
118
118
Memory += Batches.back ().Data .size ();
119
119
}
@@ -199,7 +199,7 @@ namespace {
199
199
i64 Memory = 0 ;
200
200
};
201
201
202
- constexpr i64 kInFlightMemoryLimitPerActor = 100_MB ;
202
+ constexpr i64 kInFlightMemoryLimitPerActor = 64_MB ;
203
203
}
204
204
205
205
@@ -293,10 +293,6 @@ class TKqpWriteActor : public TActorBootstrapped<TKqpWriteActor>, public NYql::N
293
293
const i64 result = Serializer
294
294
? MemoryLimit - Serializer->GetMemory () - ShardsInfo.GetMemory ()
295
295
: std::numeric_limits<i64>::min (); // Can't use zero here because compute can use overcommit!
296
-
297
- if (result <= 0 ) {
298
- CA_LOG_D (" No free space left. FreeSpace=" << result << " bytes." );
299
- }
300
296
return result;
301
297
}
302
298
@@ -329,19 +325,22 @@ class TKqpWriteActor : public TActorBootstrapped<TKqpWriteActor>, public NYql::N
329
325
NYql::NDqProto::StatusIds::INTERNAL_ERROR);
330
326
}
331
327
332
- TResumeNotificationManager resumeNotificator (*this );
333
- for (auto & [shardId, batches] : Serializer->FlushBatches ()) {
334
- for (auto & batch : batches) {
335
- ShardsInfo.GetShard (shardId).PushBatch (std::move (batch));
328
+ if (Finished || GetFreeSpace () <= 0 || SchemeEntry->Kind == NSchemeCache::TSchemeCacheNavigate::KindColumnTable) {
329
+ TResumeNotificationManager resumeNotificator (*this );
330
+ for (auto & [shardId, batches] : Serializer->FlushBatchesForce ()) {
331
+ for (auto & batch : batches) {
332
+ ShardsInfo.GetShard (shardId).PushBatch (std::move (batch));
333
+ }
336
334
}
335
+ resumeNotificator.CheckMemory ();
337
336
}
338
- resumeNotificator.CheckMemory ();
339
- YQL_ENSURE (!Finished || Serializer->IsFinished ());
340
337
341
338
if (Finished) {
342
339
for (auto & [shardId, shardInfo] : ShardsInfo.GetShards ()) {
343
340
shardInfo.Close ();
344
341
}
342
+
343
+ YQL_ENSURE (Serializer->IsFinished ());
345
344
}
346
345
347
346
ProcessBatches ();
@@ -458,6 +457,10 @@ class TKqpWriteActor : public TActorBootstrapped<TKqpWriteActor>, public NYql::N
458
457
459
458
switch (ev->Get ()->GetStatus ()) {
460
459
case NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED: {
460
+ CA_LOG_E (" Got UNSPECIFIED for table `"
461
+ << SchemeEntry->TableId .PathId .ToString () << " `."
462
+ << " ShardID=" << ev->Get ()->Record .GetOrigin () << " ,"
463
+ << " Sink=" << this ->SelfId () << " ." );
461
464
RuntimeError (
462
465
TStringBuilder () << " Got UNSPECIFIED for table `"
463
466
<< SchemeEntry->TableId .PathId .ToString () << " `." ,
@@ -473,6 +476,10 @@ class TKqpWriteActor : public TActorBootstrapped<TKqpWriteActor>, public NYql::N
473
476
return ;
474
477
}
475
478
case NKikimrDataEvents::TEvWriteResult::STATUS_ABORTED: {
479
+ CA_LOG_E (" Got ABORTED for table `"
480
+ << SchemeEntry->TableId .PathId .ToString () << " `."
481
+ << " ShardID=" << ev->Get ()->Record .GetOrigin () << " ,"
482
+ << " Sink=" << this ->SelfId () << " ." );
476
483
RuntimeError (
477
484
TStringBuilder () << " Got ABORTED for table `"
478
485
<< SchemeEntry->TableId .PathId .ToString () << " `." ,
@@ -481,6 +488,10 @@ class TKqpWriteActor : public TActorBootstrapped<TKqpWriteActor>, public NYql::N
481
488
return ;
482
489
}
483
490
case NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR: {
491
+ CA_LOG_E (" Got INTERNAL ERROR for table `"
492
+ << SchemeEntry->TableId .PathId .ToString () << " `."
493
+ << " ShardID=" << ev->Get ()->Record .GetOrigin () << " ,"
494
+ << " Sink=" << this ->SelfId () << " ." );
484
495
RuntimeError (
485
496
TStringBuilder () << " Got INTERNAL ERROR for table `"
486
497
<< SchemeEntry->TableId .PathId .ToString () << " `." ,
@@ -489,12 +500,18 @@ class TKqpWriteActor : public TActorBootstrapped<TKqpWriteActor>, public NYql::N
489
500
return ;
490
501
}
491
502
case NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED: {
492
- CA_LOG_D (" Got OVERLOADED for table `"
493
- << SchemeEntry->TableId .PathId .ToString () << " `. "
494
- << " Ignored this error." );
503
+ CA_LOG_W (" Got OVERLOADED for table `"
504
+ << SchemeEntry->TableId .PathId .ToString () << " `."
505
+ << " ShardID=" << ev->Get ()->Record .GetOrigin () << " ,"
506
+ << " Sink=" << this ->SelfId () << " ."
507
+ << " Ignored this error." );
495
508
return ;
496
509
}
497
510
case NKikimrDataEvents::TEvWriteResult::STATUS_CANCELLED: {
511
+ CA_LOG_E (" Got CANCELLED for table `"
512
+ << SchemeEntry->TableId .PathId .ToString () << " `."
513
+ << " ShardID=" << ev->Get ()->Record .GetOrigin () << " ,"
514
+ << " Sink=" << this ->SelfId () << " ." );
498
515
RuntimeError (
499
516
TStringBuilder () << " Got CANCELLED for table `"
500
517
<< SchemeEntry->TableId .PathId .ToString () << " `." ,
@@ -503,6 +520,10 @@ class TKqpWriteActor : public TActorBootstrapped<TKqpWriteActor>, public NYql::N
503
520
return ;
504
521
}
505
522
case NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST: {
523
+ CA_LOG_E (" Got BAD REQUEST for table `"
524
+ << SchemeEntry->TableId .PathId .ToString () << " `."
525
+ << " ShardID=" << ev->Get ()->Record .GetOrigin () << " ,"
526
+ << " Sink=" << this ->SelfId () << " ." );
506
527
RuntimeError (
507
528
TStringBuilder () << " Got BAD REQUEST for table `"
508
529
<< SchemeEntry->TableId .PathId .ToString () << " `." ,
@@ -511,6 +532,10 @@ class TKqpWriteActor : public TActorBootstrapped<TKqpWriteActor>, public NYql::N
511
532
return ;
512
533
}
513
534
case NKikimrDataEvents::TEvWriteResult::STATUS_SCHEME_CHANGED: {
535
+ CA_LOG_E (" Got SCHEME CHANGED for table `"
536
+ << SchemeEntry->TableId .PathId .ToString () << " `."
537
+ << " ShardID=" << ev->Get ()->Record .GetOrigin () << " ,"
538
+ << " Sink=" << this ->SelfId () << " ." );
514
539
RuntimeError (
515
540
TStringBuilder () << " Got SCHEME CHANGED for table `"
516
541
<< SchemeEntry->TableId .PathId .ToString () << " `." ,
@@ -519,6 +544,10 @@ class TKqpWriteActor : public TActorBootstrapped<TKqpWriteActor>, public NYql::N
519
544
return ;
520
545
}
521
546
case NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN: {
547
+ CA_LOG_E (" Got LOCKS BROKEN for table `"
548
+ << SchemeEntry->TableId .PathId .ToString () << " `."
549
+ << " ShardID=" << ev->Get ()->Record .GetOrigin () << " ,"
550
+ << " Sink=" << this ->SelfId () << " ." );
522
551
RuntimeError (
523
552
TStringBuilder () << " Got LOCKS BROKEN for table `"
524
553
<< SchemeEntry->TableId .PathId .ToString () << " `." ,
@@ -557,15 +586,28 @@ class TKqpWriteActor : public TActorBootstrapped<TKqpWriteActor>, public NYql::N
557
586
}
558
587
559
588
void ProcessBatches () {
589
+ MakeNewBatches ();
560
590
SendBatchesToShards ();
561
- if (ShardsInfo.IsFinished ()) {
591
+ if (Finished && Serializer-> IsFinished () && ShardsInfo.IsFinished ()) {
562
592
CA_LOG_D (" Write actor finished" );
563
593
Callbacks->OnAsyncOutputFinished (GetOutputIndex ());
564
594
}
565
595
}
566
596
597
+ void MakeNewBatches () {
598
+ for (const size_t shardId : Serializer->GetShardIds ()) {
599
+ auto & shard = ShardsInfo.GetShard (shardId);
600
+ if (shard.IsEmpty ()) {
601
+ auto batch = Serializer->FlushBatch (shardId);
602
+ if (!batch.empty ()) {
603
+ shard.PushBatch (std::move (batch));
604
+ }
605
+ }
606
+ }
607
+ }
608
+
567
609
void SendBatchesToShards () {
568
- for (size_t shardId : ShardsInfo.GetPendingShards ()) {
610
+ for (const size_t shardId : ShardsInfo.GetPendingShards ()) {
569
611
auto & shard = ShardsInfo.GetShard (shardId);
570
612
YQL_ENSURE (!shard.IsEmpty ());
571
613
SendDataToShard (shardId);
@@ -577,14 +619,20 @@ class TKqpWriteActor : public TActorBootstrapped<TKqpWriteActor>, public NYql::N
577
619
YQL_ENSURE (!shard.IsEmpty ());
578
620
auto & inFlightBatch = shard.CurrentBatch ();
579
621
if (inFlightBatch.SendAttempts >= BackoffSettings ()->MaxWriteAttempts ) {
622
+ CA_LOG_E (" ShardId=" << shardId
623
+ << " for table '" << Settings.GetTable ().GetPath ()
624
+ << " ': retry limit exceeded."
625
+ << " Sink=" << this ->SelfId () << " ." );
580
626
RuntimeError (
581
- TStringBuilder () << " ShardId=" << shardId << " for table '" << Settings.GetTable ().GetPath () << " ': retry limit exceeded" ,
627
+ TStringBuilder ()
628
+ << " ShardId=" << shardId
629
+ << " for table '" << Settings.GetTable ().GetPath ()
630
+ << " ': retry limit exceeded." ,
582
631
NYql::NDqProto::StatusIds::UNAVAILABLE);
583
632
return ;
584
633
}
585
634
586
635
auto evWrite = std::make_unique<NKikimr::NEvents::TDataEvents::TEvWrite>(
587
- std::get<ui64>(TxId),
588
636
NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE);
589
637
YQL_ENSURE (!inFlightBatch.Data .empty ());
590
638
@@ -608,7 +656,8 @@ class TKqpWriteActor : public TActorBootstrapped<TKqpWriteActor>, public NYql::N
608
656
CA_LOG_D (" Send EvWrite to ShardID=" << shardId << " , TxId=" << std::get<ui64>(TxId)
609
657
<< " , LockTxId=" << Settings.GetLockTxId () << " , LockNodeId=" << Settings.GetLockNodeId ()
610
658
<< " , Size=" << inFlightBatch.Data .size () << " , Cookie=" << inFlightBatch.Cookie
611
- << " ; ShardBatchesLeft=" << shard.Size () << " , ShardClosed=" << shard.IsClosed ());
659
+ << " ; ShardBatchesLeft=" << shard.Size () << " , ShardClosed=" << shard.IsClosed ()
660
+ << " ; Attempts=" << inFlightBatch.SendAttempts );
612
661
Send (
613
662
PipeCacheId,
614
663
new TEvPipeCache::TEvForward (evWrite.release (), shardId, true ),
@@ -630,16 +679,17 @@ class TKqpWriteActor : public TActorBootstrapped<TKqpWriteActor>, public NYql::N
630
679
return ;
631
680
}
632
681
633
- CA_LOG_D (" Retry ShardID=" << shardId);
682
+ CA_LOG_T (" Retry ShardID=" << shardId << " with Cookie= " << ifCookieEqual. value_or ( 0 ) );
634
683
SendDataToShard (shardId);
635
684
}
636
685
637
686
void Handle (TEvPrivate::TEvShardRequestTimeout::TPtr& ev) {
687
+ CA_LOG_W (" Timeout shardID=" << ev->Get ()->ShardId );
638
688
RetryShard (ev->Get ()->ShardId , ev->Cookie );
639
689
}
640
690
641
691
void Handle (TEvPipeCache::TEvDeliveryProblem::TPtr& ev) {
642
- CA_LOG_D (" TEvDeliveryProblem was received from tablet: " << ev->Get ()->TabletId );
692
+ CA_LOG_W (" TEvDeliveryProblem was received from tablet: " << ev->Get ()->TabletId );
643
693
RetryShard (ev->Get ()->TabletId , std::nullopt);
644
694
}
645
695
0 commit comments