@@ -160,20 +160,20 @@ struct TEvS3FileQueue {
160
160
161
161
EvEnd
162
162
};
163
- static_assert(EvEnd < EventSpaceEnd(NKikimr::TKikimrEvents::ES_S3_FILE_QUEUE),
163
+ static_assert(EvEnd < EventSpaceEnd(NKikimr::TKikimrEvents::ES_S3_FILE_QUEUE),
164
164
" expect EvEnd < EventSpaceEnd(TEvents::ES_S3_FILE_QUEUE)");
165
-
165
+
166
166
struct TEvUpdateConsumersCount :
167
167
public TEventPB<TEvUpdateConsumersCount, NS3::FileQueue::TEvUpdateConsumersCount, EvUpdateConsumersCount> {
168
-
168
+
169
169
explicit TEvUpdateConsumersCount (ui64 consumersCountDelta = 0 ) {
170
170
Record.SetConsumersCountDelta (consumersCountDelta);
171
171
}
172
172
};
173
173
174
174
struct TEvAck :
175
175
public TEventPB<TEvAck, NS3::FileQueue::TEvAck, EvAck> {
176
-
176
+
177
177
TEvAck () = default ;
178
178
179
179
explicit TEvAck (const TMessageTransportMeta& transportMeta) {
@@ -388,6 +388,7 @@ class TS3FileQueueActor : public TActorBootstrapped<TS3FileQueueActor> {
388
388
TPathList paths,
389
389
size_t prefetchSize,
390
390
ui64 fileSizeLimit,
391
+ ui64 readLimit,
391
392
bool useRuntimeListing,
392
393
ui64 consumersCount,
393
394
ui64 batchSizeLimit,
@@ -401,6 +402,7 @@ class TS3FileQueueActor : public TActorBootstrapped<TS3FileQueueActor> {
401
402
: TxId(std::move(txId))
402
403
, PrefetchSize(prefetchSize)
403
404
, FileSizeLimit(fileSizeLimit)
405
+ , ReadLimit(readLimit)
404
406
, MaybeIssues(Nothing())
405
407
, UseRuntimeListing(useRuntimeListing)
406
408
, ConsumersCount(consumersCount)
@@ -513,7 +515,9 @@ class TS3FileQueueActor : public TActorBootstrapped<TS3FileQueueActor> {
513
515
// skip 'directories'
514
516
continue ;
515
517
}
516
- if (object.Size > FileSizeLimit) {
518
+
519
+ const ui64 bytesUsed = std::min (object.Size , ReadLimit);
520
+ if (bytesUsed > FileSizeLimit) {
517
521
auto errorMessage = TStringBuilder ()
518
522
<< " Size of object " << object.Path << " = "
519
523
<< object.Size
@@ -525,10 +529,10 @@ class TS3FileQueueActor : public TActorBootstrapped<TS3FileQueueActor> {
525
529
LOG_T (" TS3FileQueueActor" , " SaveRetrievedResults adding path: " << object.Path << " of size " << object.Size );
526
530
TObjectPath objectPath;
527
531
objectPath.SetPath (object.Path );
528
- objectPath.SetSize (object. Size );
532
+ objectPath.SetSize (bytesUsed );
529
533
objectPath.SetPathIndex (CurrentDirectoryPathIndex);
530
534
Objects.emplace_back (std::move (objectPath));
531
- ObjectsTotalSize += object. Size ;
535
+ ObjectsTotalSize += bytesUsed ;
532
536
}
533
537
return true ;
534
538
}
@@ -598,7 +602,7 @@ class TS3FileQueueActor : public TActorBootstrapped<TS3FileQueueActor> {
598
602
Send (ev->Sender , new TEvS3FileQueue::TEvObjectPathReadError (*MaybeIssues, ev->Get ()->Record .GetTransportMeta ()));
599
603
TryFinish (ev->Sender , ev->Get ()->Record .GetTransportMeta ().GetSeqNo ());
600
604
}
601
-
605
+
602
606
void HandleUpdateConsumersCount (TEvS3FileQueue::TEvUpdateConsumersCount::TPtr& ev) {
603
607
if (!UpdatedConsumers.contains (ev->Sender )) {
604
608
UpdatedConsumers.insert (ev->Sender );
@@ -653,7 +657,7 @@ class TS3FileQueueActor : public TActorBootstrapped<TS3FileQueueActor> {
653
657
654
658
LOG_D (" TS3FileQueueActor" , " SendObjects Sending " << result.size () << " objects to consumer with id " << consumer << " , " << ObjectsTotalSize << " bytes left" );
655
659
Send (consumer, new TEvS3FileQueue::TEvObjectPathBatch (std::move (result), HasNoMoreItems (), transportMeta));
656
-
660
+
657
661
if (HasNoMoreItems ()) {
658
662
TryFinish (consumer, transportMeta.GetSeqNo ());
659
663
}
@@ -675,7 +679,7 @@ class TS3FileQueueActor : public TActorBootstrapped<TS3FileQueueActor> {
675
679
}
676
680
677
681
bool CanSendToConsumer (const TActorId& consumer) {
678
- return !UseRuntimeListing || RoundRobinStageFinished ||
682
+ return !UseRuntimeListing || RoundRobinStageFinished ||
679
683
(StartedConsumers.size () < ConsumersCount && !StartedConsumers.contains (consumer));
680
684
}
681
685
@@ -753,7 +757,7 @@ class TS3FileQueueActor : public TActorBootstrapped<TS3FileQueueActor> {
753
757
}
754
758
});
755
759
}
756
-
760
+
757
761
void ScheduleRequest (const TActorId& consumer, const TMessageTransportMeta& transportMeta) {
758
762
PendingRequests[consumer].push_back (transportMeta);
759
763
HasPendingRequests = true ;
@@ -790,7 +794,7 @@ class TS3FileQueueActor : public TActorBootstrapped<TS3FileQueueActor> {
790
794
}
791
795
}
792
796
}
793
-
797
+
794
798
void TryFinish (const TActorId& consumer, ui64 seqNo) {
795
799
LOG_T (" TS3FileQueueActor" , " TryFinish from consumer " << consumer << " , " << FinishedConsumers.size () << " consumers already finished, seqNo=" << seqNo);
796
800
if (FinishingConsumerToLastSeqNo.contains (consumer)) {
@@ -814,6 +818,7 @@ class TS3FileQueueActor : public TActorBootstrapped<TS3FileQueueActor> {
814
818
815
819
size_t PrefetchSize;
816
820
ui64 FileSizeLimit;
821
+ ui64 ReadLimit;
817
822
TMaybe<NS3Lister::IS3Lister::TPtr> MaybeLister = Nothing();
818
823
TMaybe<NThreading::TFuture<NS3Lister::TListResult>> ListingFuture;
819
824
size_t CurrentDirectoryPathIndex = 0 ;
@@ -838,7 +843,7 @@ class TS3FileQueueActor : public TActorBootstrapped<TS3FileQueueActor> {
838
843
const TString Pattern;
839
844
const ES3PatternVariant PatternVariant;
840
845
const ES3PatternType PatternType;
841
-
846
+
842
847
static constexpr TDuration PoisonTimeout = TDuration::Hours(3 );
843
848
static constexpr TDuration RoundRobinStageTimeout = TDuration::Seconds(3 );
844
849
};
@@ -918,6 +923,7 @@ class TS3ReadActor : public TActorBootstrapped<TS3ReadActor>, public IDqComputeA
918
923
std::move (Paths),
919
924
ReadActorFactoryCfg.MaxInflight * 2 ,
920
925
FileSizeLimit,
926
+ SizeLimit,
921
927
false ,
922
928
1 ,
923
929
FileQueueBatchSizeLimit,
@@ -1097,7 +1103,7 @@ class TS3ReadActor : public TActorBootstrapped<TS3ReadActor>, public IDqComputeA
1097
1103
void HandleAck (TEvS3FileQueue::TEvAck::TPtr& ev) {
1098
1104
FileQueueEvents.OnEventReceived (ev);
1099
1105
}
1100
-
1106
+
1101
1107
static void OnDownloadFinished (TActorSystem* actorSystem, TActorId selfId, const TString& requestId, IHTTPGateway::TResult&& result, size_t pathInd, const TString path) {
1102
1108
if (!result.Issues ) {
1103
1109
actorSystem->Send (new IEventHandle (selfId, TActorId (), new TEvPrivate::TEvReadResult (std::move (result.Content ), requestId, pathInd, path)));
@@ -1209,7 +1215,7 @@ class TS3ReadActor : public TActorBootstrapped<TS3ReadActor>, public IDqComputeA
1209
1215
auto issues = NS3Util::AddParentIssue (TStringBuilder{} << " Error while reading file " << path << " with request id [" << requestId << " ]" , TIssues{result->Get ()->Error });
1210
1216
Send (ComputeActorId, new TEvAsyncInputError (InputIndex, std::move (issues), NYql::NDqProto::StatusIds::EXTERNAL_ERROR));
1211
1217
}
1212
-
1218
+
1213
1219
void Handle (const NYql::NDq::TEvRetryQueuePrivate::TEvRetry::TPtr&) {
1214
1220
FileQueueEvents.Retry ();
1215
1221
}
@@ -2088,7 +2094,7 @@ class TS3ReadCoroImpl : public TActorCoroImpl {
2088
2094
if (isCancelled) {
2089
2095
LOG_CORO_D (" RunCoroBlockArrowParserOverHttp - STOPPED ON SATURATION, downloaded " <<
2090
2096
QueueBufferCounter->DownloadedBytes << " bytes" );
2091
- break ;
2097
+ break ;
2092
2098
}
2093
2099
}
2094
2100
}
@@ -2538,6 +2544,7 @@ class TS3StreamReadActor : public TActorBootstrapped<TS3StreamReadActor>, public
2538
2544
::NMonitoring::TDynamicCounterPtr counters,
2539
2545
::NMonitoring::TDynamicCounterPtr taskCounters,
2540
2546
ui64 fileSizeLimit,
2547
+ ui64 readLimit,
2541
2548
std::optional<ui64> rowsLimitHint,
2542
2549
IMemoryQuotaManager::TPtr memoryQuotaManager,
2543
2550
bool useRuntimeListing,
@@ -2564,6 +2571,7 @@ class TS3StreamReadActor : public TActorBootstrapped<TS3StreamReadActor>, public
2564
2571
, TaskCounters(std::move(taskCounters))
2565
2572
, FileQueueActor(fileQueueActor)
2566
2573
, FileSizeLimit(fileSizeLimit)
2574
+ , ReadLimit(readLimit)
2567
2575
, MemoryQuotaManager(memoryQuotaManager)
2568
2576
, UseRuntimeListing(useRuntimeListing)
2569
2577
, FileQueueBatchSizeLimit(fileQueueBatchSizeLimit)
@@ -2622,6 +2630,7 @@ class TS3StreamReadActor : public TActorBootstrapped<TS3StreamReadActor>, public
2622
2630
std::move (Paths),
2623
2631
ReadActorFactoryCfg.MaxInflight * 2 ,
2624
2632
FileSizeLimit,
2633
+ ReadLimit,
2625
2634
false ,
2626
2635
1 ,
2627
2636
FileQueueBatchSizeLimit,
@@ -2784,7 +2793,7 @@ class TS3StreamReadActor : public TActorBootstrapped<TS3StreamReadActor>, public
2784
2793
void CommitState (const NDqProto::TCheckpoint&) final {}
2785
2794
2786
2795
ui64 GetInputIndex () const final {
2787
- return InputIndex;
2796
+ return InputIndex;
2788
2797
}
2789
2798
2790
2799
const TDqAsyncStats& GetIngressStats () const final {
@@ -3038,7 +3047,7 @@ class TS3StreamReadActor : public TActorBootstrapped<TS3StreamReadActor>, public
3038
3047
}
3039
3048
}
3040
3049
}
3041
-
3050
+
3042
3051
void Handle (TEvS3FileQueue::TEvAck::TPtr& ev) {
3043
3052
FileQueueEvents.OnEventReceived (ev);
3044
3053
}
@@ -3136,6 +3145,7 @@ class TS3StreamReadActor : public TActorBootstrapped<TS3StreamReadActor>, public
3136
3145
std::set<NActors::TActorId> CoroActors;
3137
3146
NActors::TActorId FileQueueActor;
3138
3147
const ui64 FileSizeLimit;
3148
+ const ui64 ReadLimit;
3139
3149
bool Bootstrapped = false ;
3140
3150
IMemoryQuotaManager::TPtr MemoryQuotaManager;
3141
3151
bool UseRuntimeListing;
@@ -3295,6 +3305,7 @@ IActor* CreateS3FileQueueActor(
3295
3305
TPathList paths,
3296
3306
size_t prefetchSize,
3297
3307
ui64 fileSizeLimit,
3308
+ ui64 readLimit,
3298
3309
bool useRuntimeListing,
3299
3310
ui64 consumersCount,
3300
3311
ui64 batchSizeLimit,
@@ -3310,6 +3321,7 @@ IActor* CreateS3FileQueueActor(
3310
3321
paths,
3311
3322
prefetchSize,
3312
3323
fileSizeLimit,
3324
+ readLimit,
3313
3325
useRuntimeListing,
3314
3326
consumersCount,
3315
3327
batchSizeLimit,
@@ -3394,15 +3406,15 @@ std::pair<NYql::NDq::IDqComputeActorAsyncInput*, IActor*> CreateS3ReadActor(
3394
3406
if (params.GetRowsLimitHint () != 0 ) {
3395
3407
rowsLimitHint = params.GetRowsLimitHint ();
3396
3408
}
3397
-
3409
+
3398
3410
TActorId fileQueueActor;
3399
3411
if (auto it = settings.find (" fileQueueActor" ); it != settings.cend ()) {
3400
3412
NActorsProto::TActorId protoId;
3401
3413
TMemoryInput inputStream (it->second );
3402
3414
ParseFromTextFormat (inputStream, protoId);
3403
3415
fileQueueActor = ActorIdFromProto (protoId);
3404
3416
}
3405
-
3417
+
3406
3418
ui64 fileQueueBatchSizeLimit = 0 ;
3407
3419
if (auto it = settings.find (" fileQueueBatchSizeLimit" ); it != settings.cend ()) {
3408
3420
fileQueueBatchSizeLimit = FromString<ui64>(it->second );
@@ -3412,7 +3424,7 @@ std::pair<NYql::NDq::IDqComputeActorAsyncInput*, IActor*> CreateS3ReadActor(
3412
3424
if (auto it = settings.find (" fileQueueBatchObjectCountLimit" ); it != settings.cend ()) {
3413
3425
fileQueueBatchObjectCountLimit = FromString<ui64>(it->second );
3414
3426
}
3415
-
3427
+
3416
3428
ui64 fileQueueConsumersCountDelta = 0 ;
3417
3429
if (readRanges.size () > 1 ) {
3418
3430
fileQueueConsumersCountDelta = readRanges.size () - 1 ;
@@ -3520,9 +3532,14 @@ std::pair<NYql::NDq::IDqComputeActorAsyncInput*, IActor*> CreateS3ReadActor(
3520
3532
3521
3533
#undef SET_FLAG
3522
3534
#undef SUPPORTED_FLAGS
3535
+ ui64 sizeLimit = std::numeric_limits<ui64>::max ();
3536
+ if (const auto it = settings.find (" sizeLimit" ); settings.cend () != it) {
3537
+ sizeLimit = FromString<ui64>(it->second );
3538
+ }
3539
+
3523
3540
const auto actor = new TS3StreamReadActor (inputIndex, statsLevel, txId, std::move (gateway), holderFactory, params.GetUrl (), authInfo, pathPattern, pathPatternVariant,
3524
3541
std::move (paths), addPathIndex, readSpec, computeActorId, retryPolicy,
3525
- cfg, counters, taskCounters, fileSizeLimit, rowsLimitHint, memoryQuotaManager,
3542
+ cfg, counters, taskCounters, fileSizeLimit, sizeLimit, rowsLimitHint, memoryQuotaManager,
3526
3543
params.GetUseRuntimeListing (), fileQueueActor, fileQueueBatchSizeLimit, fileQueueBatchObjectCountLimit, fileQueueConsumersCountDelta);
3527
3544
3528
3545
return {actor, actor};
0 commit comments