@@ -78,8 +78,10 @@ class TColumnChunkRestoreInfo {
78
78
i.second .GetBlobDataVerified ().size ());
79
79
std::vector<NArrow::NAccessor::TDeserializeChunkedArray::TChunk> chunks = { NArrow::NAccessor::TDeserializeChunkedArray::TChunk (
80
80
GetRecordsCount (), i.second .GetBlobDataVerified ()) };
81
- const std::shared_ptr<NArrow::NAccessor::IChunkedArray> arrOriginal = deserialize
82
- ? columnLoader->ApplyVerified (i.second .GetBlobDataVerified (), GetRecordsCount ())
81
+ // const ui32 filledRecordsCount = PartialArray->GetHeader().GetColumnStats().GetColumnRecordsCount(i.second.GetColumnIdx());
82
+ const std::shared_ptr<NArrow::NAccessor::IChunkedArray> arrOriginal =
83
+ deserialize
84
+ ? columnLoader->ApplyVerified (i.second .GetBlobDataVerified (), GetRecordsCount ()/* , filledRecordsCount*/ )
83
85
: std::make_shared<NArrow::NAccessor::TDeserializeChunkedArray>(GetRecordsCount (), columnLoader, std::move (chunks), true );
84
86
if (applyFilter) {
85
87
PartialArray->AddColumn (i.first , applyFilter->Apply (arrOriginal));
@@ -124,14 +126,12 @@ class TColumnChunkRestoreInfo {
124
126
// "others", PartialArray->GetHeader().GetOtherStats().DebugJson().GetStringRobust());
125
127
}
126
128
127
- void InitPartialReader (
128
- const ui32 columnId, const ui32 positionStart, const std::shared_ptr<NArrow::NAccessor::TAccessorsCollection>& resources) {
129
+ void InitPartialReader (const std::shared_ptr<NArrow::NAccessor::IChunkedArray>& accessor) {
129
130
AFL_VERIFY (!HeaderRange);
130
131
AFL_VERIFY (!PartialArray);
131
- auto columnAccessor = resources->GetAccessorVerified (columnId);
132
- auto partialArray = columnAccessor->GetArraySlow (positionStart, columnAccessor);
133
- AFL_VERIFY (partialArray.GetArray ()->GetType () == NArrow::NAccessor::IChunkedArray::EType::SubColumnsPartialArray);
134
- PartialArray = std::static_pointer_cast<NArrow::NAccessor::TSubColumnsPartialArray>(partialArray.GetArray ());
132
+ AFL_VERIFY (accessor);
133
+ AFL_VERIFY (accessor->GetType () == NArrow::NAccessor::IChunkedArray::EType::SubColumnsPartialArray)(" type" , accessor->GetType ());
134
+ PartialArray = std::static_pointer_cast<NArrow::NAccessor::TSubColumnsPartialArray>(accessor);
135
135
}
136
136
137
137
TColumnChunkRestoreInfo (const TBlobRange& fullChunkRange, const NArrow::NAccessor::TChunkConstructionData& chunkExternalInfo)
@@ -179,8 +179,10 @@ class TSubColumnsFetchLogic: public IKernelFetchLogic {
179
179
}
180
180
Resources->AddVerified (GetColumnId (), compositeBuilder.Finish (), true );
181
181
} else {
182
+ ui32 pos = 0 ;
182
183
for (auto && i : ColumnChunks) {
183
- i.Finish (Resources->GetAppliedFilter (), Source);
184
+ i.Finish (std::make_shared<NArrow::TColumnFilter>(Resources->GetAppliedFilter ()->Slice (pos, i.GetRecordsCount ())), Source);
185
+ pos += i.GetRecordsCount ();
184
186
}
185
187
}
186
188
}
@@ -238,23 +240,36 @@ class TSubColumnsFetchLogic: public IKernelFetchLogic {
238
240
auto itFilter = cFilter.GetIterator (false , Source->GetRecordsCount ());
239
241
bool itFinished = false ;
240
242
241
- NeedToAddResource = !Resources->HasColumn (GetColumnId ());
242
- ui32 posCurrent = 0 ;
243
- for (auto && c : columnChunks) {
243
+ auto accessor = Resources->GetAccessorOptional (GetColumnId ());
244
+ NeedToAddResource = !accessor;
245
+ std::vector<std::shared_ptr<NArrow::NAccessor::IChunkedArray>> chunks;
246
+ if (!NeedToAddResource) {
247
+ if (accessor->GetType () == NArrow::NAccessor::IChunkedArray::EType::CompositeChunkedArray) {
248
+ auto composite = std::static_pointer_cast<NArrow::NAccessor::TCompositeChunkedArray>(accessor);
249
+ chunks = composite->GetChunks ();
250
+ } else {
251
+ chunks.emplace_back (accessor);
252
+ }
253
+ }
254
+ ui32 resChunkIdx = 0 ;
255
+ for (ui32 chunkIdx = 0 ; chunkIdx < columnChunks.size (); ++chunkIdx) {
256
+ auto & meta = columnChunks[chunkIdx]->GetMeta ();
244
257
AFL_VERIFY (!itFinished);
245
- if (!itFilter.IsBatchForSkip (c-> GetMeta () .GetRecordsCount ())) {
246
- const TBlobRange range = Source->RestoreBlobRange (c ->BlobRange );
247
- ColumnChunks.emplace_back (range, ChunkExternalInfo.GetSubset (c-> GetMeta () .GetRecordsCount ()));
258
+ if (!itFilter.IsBatchForSkip (meta .GetRecordsCount ())) {
259
+ const TBlobRange range = Source->RestoreBlobRange (columnChunks[chunkIdx] ->BlobRange );
260
+ ColumnChunks.emplace_back (range, ChunkExternalInfo.GetSubset (meta .GetRecordsCount ()));
248
261
if (!NeedToAddResource) {
249
- ColumnChunks.back ().InitPartialReader (GetColumnId (), posCurrent, Resources);
262
+ AFL_VERIFY (resChunkIdx < chunks.size ())(" chunks" , chunks.size ())(" meta" , columnChunks.size ())(" need" , NeedToAddResource);
263
+ ColumnChunks.back ().InitPartialReader (chunks[resChunkIdx]);
264
+ ++resChunkIdx;
250
265
}
251
266
ColumnChunks.back ().InitReading (reading, SubColumns);
252
267
} else {
253
- ColumnChunks.emplace_back (TColumnChunkRestoreInfo::BuildEmpty (ChunkExternalInfo.GetSubset (c-> GetMeta () .GetRecordsCount ())));
268
+ ColumnChunks.emplace_back (TColumnChunkRestoreInfo::BuildEmpty (ChunkExternalInfo.GetSubset (meta .GetRecordsCount ())));
254
269
}
255
- itFinished = !itFilter.Next (c->GetMeta ().GetRecordsCount ());
256
- posCurrent += c->GetMeta ().GetRecordsCount ();
270
+ itFinished = !itFilter.Next (meta.GetRecordsCount ());
257
271
}
272
+ AFL_VERIFY (NeedToAddResource || (resChunkIdx == chunks.size ()));
258
273
AFL_VERIFY (itFinished)(" filter" , itFilter.DebugString ())(" count" , Source->GetRecordsCount ());
259
274
for (auto && i : blobsAction.GetReadingActions ()) {
260
275
nextRead.Add (i);
0 commit comments