@@ -95,8 +95,12 @@ class TDqOutputChannel : public IDqOutputChannel {
95
95
return ;
96
96
}
97
97
98
+ ui32 rows = Packer.IsBlock () ?
99
+ NKikimr::NMiniKQL::TArrowBlock::From (values[width - 1 ]).GetDatum ().scalar_as <arrow::UInt64Scalar>().value
100
+ : 1 ;
101
+
98
102
if (PushStats.CollectBasic ()) {
99
- PushStats.Rows ++ ;
103
+ PushStats.Rows += rows ;
100
104
PushStats.Chunks ++;
101
105
PushStats.Resume ();
102
106
}
@@ -111,6 +115,7 @@ class TDqOutputChannel : public IDqOutputChannel {
111
115
}
112
116
113
117
PackerCurrentChunkCount++;
118
+ PackerCurrentRowCount += rows;
114
119
115
120
size_t packerSize = Packer.PackedSizeEstimate ();
116
121
if (packerSize >= MaxChunkBytes) {
@@ -121,8 +126,11 @@ class TDqOutputChannel : public IDqOutputChannel {
121
126
}
122
127
PackedDataSize += Data.back ().Buffer .Size ();
123
128
PackedChunkCount += PackerCurrentChunkCount;
129
+ PackedRowCount += PackerCurrentRowCount;
124
130
Data.back ().ChunkCount = PackerCurrentChunkCount;
131
+ Data.back ().RowCount = PackerCurrentRowCount;
125
132
PackerCurrentChunkCount = 0 ;
133
+ PackerCurrentRowCount = 0 ;
126
134
packerSize = 0 ;
127
135
}
128
136
@@ -134,11 +142,13 @@ class TDqOutputChannel : public IDqOutputChannel {
134
142
TDqSerializedBatch data;
135
143
data.Proto .SetTransportVersion (TransportVersion);
136
144
data.Proto .SetChunks (head.ChunkCount );
145
+ data.Proto .SetRows (head.RowCount );
137
146
data.SetPayload (std::move (head.Buffer ));
138
147
Storage->Put (NextStoredId++, SaveForSpilling (std::move (data)));
139
148
140
149
PackedDataSize -= bufSize;
141
150
PackedChunkCount -= head.ChunkCount ;
151
+ PackedRowCount -= head.RowCount ;
142
152
143
153
SpilledChunkCount += head.ChunkCount ;
144
154
@@ -199,22 +209,26 @@ class TDqOutputChannel : public IDqOutputChannel {
199
209
} else if (!Data.empty ()) {
200
210
auto & packed = Data.front ();
201
211
PackedChunkCount -= packed.ChunkCount ;
212
+ PackedRowCount -= packed.RowCount ;
202
213
PackedDataSize -= packed.Buffer .Size ();
203
214
data.Proto .SetChunks (packed.ChunkCount );
215
+ data.Proto .SetRows (packed.RowCount );
204
216
data.SetPayload (std::move (packed.Buffer ));
205
217
Data.pop_front ();
206
218
} else {
207
219
data.Proto .SetChunks (PackerCurrentChunkCount);
220
+ data.Proto .SetRows (PackerCurrentRowCount);
208
221
data.SetPayload (FinishPackAndCheckSize ());
209
222
PackerCurrentChunkCount = 0 ;
223
+ PackerCurrentRowCount = 0 ;
210
224
}
211
225
212
226
DLOG (" Took " << data.RowCount () << " rows" );
213
227
214
228
if (PopStats.CollectBasic ()) {
215
229
PopStats.Bytes += data.Size ();
216
230
PopStats.Rows += data.RowCount ();
217
- PopStats.Chunks ++;
231
+ PopStats.Chunks ++; // pop chunks do not match push chunks
218
232
if (!IsFull () || FirstStoredId == NextStoredId) {
219
233
PopStats.Resume ();
220
234
}
@@ -257,28 +271,43 @@ class TDqOutputChannel : public IDqOutputChannel {
257
271
data.Proto .SetTransportVersion (TransportVersion);
258
272
if (SpilledChunkCount == 0 && PackedChunkCount == 0 ) {
259
273
data.Proto .SetChunks (PackerCurrentChunkCount);
274
+ data.Proto .SetRows (PackerCurrentRowCount);
260
275
data.SetPayload (FinishPackAndCheckSize ());
276
+ if (PushStats.CollectBasic ()) {
277
+ PushStats.Bytes += data.Payload .Size ();
278
+ }
261
279
PackerCurrentChunkCount = 0 ;
280
+ PackerCurrentRowCount = 0 ;
262
281
return true ;
263
282
}
264
283
265
284
// Repack all - thats why PopAll should never be used
266
285
if (PackerCurrentChunkCount) {
267
286
Data.emplace_back ();
268
287
Data.back ().Buffer = FinishPackAndCheckSize ();
288
+ if (PushStats.CollectBasic ()) {
289
+ PushStats.Bytes += Data.back ().Buffer .Size ();
290
+ }
269
291
PackedDataSize += Data.back ().Buffer .Size ();
270
292
PackedChunkCount += PackerCurrentChunkCount;
293
+ PackedRowCount += PackerCurrentRowCount;
271
294
Data.back ().ChunkCount = PackerCurrentChunkCount;
295
+ Data.back ().RowCount = PackerCurrentRowCount;
272
296
PackerCurrentChunkCount = 0 ;
297
+ PackerCurrentRowCount = 0 ;
273
298
}
274
299
275
300
NKikimr::NMiniKQL::TUnboxedValueBatch rows (OutputType);
301
+ size_t repackedChunkCount = 0 ;
302
+ size_t repackedRowCount = 0 ;
276
303
for (;;) {
277
- TDqSerializedBatch chunk ;
278
- if (!this ->Pop (chunk )) {
304
+ TDqSerializedBatch batch ;
305
+ if (!this ->Pop (batch )) {
279
306
break ;
280
307
}
281
- Packer.UnpackBatch (chunk.PullPayload (), HolderFactory, rows);
308
+ repackedChunkCount += batch.ChunkCount ();
309
+ repackedRowCount += batch.RowCount ();
310
+ Packer.UnpackBatch (batch.PullPayload (), HolderFactory, rows);
282
311
}
283
312
284
313
if (OutputType->IsMulti ()) {
@@ -291,7 +320,8 @@ class TDqOutputChannel : public IDqOutputChannel {
291
320
});
292
321
}
293
322
294
- data.Proto .SetChunks (rows.RowCount ()); // 1 UVB "row" is Chunk
323
+ data.Proto .SetChunks (repackedChunkCount);
324
+ data.Proto .SetRows (repackedRowCount);
295
325
data.SetPayload (FinishPackAndCheckSize ());
296
326
if (PopStats.CollectBasic ()) {
297
327
PopStats.Bytes += data.Size ();
@@ -332,7 +362,12 @@ class TDqOutputChannel : public IDqOutputChannel {
332
362
ui64 rows = GetValuesCount ();
333
363
Data.clear ();
334
364
Packer.Clear ();
335
- SpilledChunkCount = PackedDataSize = PackedChunkCount = PackerCurrentChunkCount = 0 ;
365
+ PackedDataSize = 0 ;
366
+ PackedChunkCount = 0 ;
367
+ PackedRowCount = 0 ;
368
+ SpilledChunkCount = 0 ;
369
+ PackerCurrentChunkCount = 0 ;
370
+ PackerCurrentRowCount = 0 ;
336
371
FirstStoredId = NextStoredId;
337
372
return rows;
338
373
}
@@ -359,6 +394,7 @@ class TDqOutputChannel : public IDqOutputChannel {
359
394
struct TSerializedBatch {
360
395
TChunkedBuffer Buffer;
361
396
ui64 ChunkCount = 0 ;
397
+ ui64 RowCount = 0 ;
362
398
};
363
399
std::deque<TSerializedBatch> Data;
364
400
@@ -368,8 +404,10 @@ class TDqOutputChannel : public IDqOutputChannel {
368
404
369
405
size_t PackedDataSize = 0 ;
370
406
size_t PackedChunkCount = 0 ;
407
+ size_t PackedRowCount = 0 ;
371
408
372
409
size_t PackerCurrentChunkCount = 0 ;
410
+ size_t PackerCurrentRowCount = 0 ;
373
411
374
412
bool Finished = false ;
375
413
0 commit comments