|
16 | 16 |
|
17 | 17 | namespace NKikimr::NOlap::NCompaction {
|
18 | 18 |
|
19 |
| -TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstructionContext& context) noexcept { |
| 19 | +void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByFullBatches(TConstructionContext& context) noexcept { |
20 | 20 | std::vector<TPortionInfoWithBlobs> portions = TPortionInfoWithBlobs::RestorePortions(SwitchedPortions, Blobs);
|
21 | 21 | Blobs.clear();
|
22 |
| - i64 portionsSize = 0; |
23 |
| - i64 portionsCount = 0; |
24 |
| - i64 insertedPortionsSize = 0; |
25 |
| - i64 compactedPortionsSize = 0; |
26 |
| - i64 otherPortionsSize = 0; |
27 |
| - for (auto&& i : SwitchedPortions) { |
28 |
| - if (i.GetMeta().GetProduced() == TPortionMeta::EProduced::INSERTED) { |
29 |
| - insertedPortionsSize += i.GetBlobBytes(); |
30 |
| - } else if (i.GetMeta().GetProduced() == TPortionMeta::EProduced::SPLIT_COMPACTED) { |
31 |
| - compactedPortionsSize += i.GetBlobBytes(); |
32 |
| - } else { |
33 |
| - otherPortionsSize += i.GetBlobBytes(); |
| 22 | + std::vector<std::shared_ptr<arrow::RecordBatch>> batchResults; |
| 23 | + auto resultSchema = context.SchemaVersions.GetLastSchema(); |
| 24 | + { |
| 25 | + auto resultDataSchema = resultSchema->GetIndexInfo().ArrowSchemaWithSpecials(); |
| 26 | + NIndexedReader::TMergePartialStream mergeStream(resultSchema->GetIndexInfo().GetReplaceKey(), resultDataSchema, false); |
| 27 | + for (auto&& i : portions) { |
| 28 | + auto dataSchema = context.SchemaVersions.GetSchema(i.GetPortionInfo().GetMinSnapshot()); |
| 29 | + auto batch = i.GetBatch(dataSchema, *resultSchema); |
| 30 | + batch = resultSchema->NormalizeBatch(*dataSchema, batch); |
| 31 | + Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(batch, resultSchema->GetIndexInfo().GetReplaceKey())); |
| 32 | + mergeStream.AddSource(batch, nullptr); |
34 | 33 | }
|
35 |
| - portionsSize += i.GetBlobBytes(); |
36 |
| - ++portionsCount; |
| 34 | + batchResults = mergeStream.DrainAllParts(CheckPoints, resultDataSchema->fields()); |
37 | 35 | }
|
38 |
| - NChanges::TGeneralCompactionCounters::OnPortionsKind(insertedPortionsSize, compactedPortionsSize, otherPortionsSize); |
39 |
| - NChanges::TGeneralCompactionCounters::OnRepackPortions(portionsCount, portionsSize); |
| 36 | + Y_ABORT_UNLESS(batchResults.size()); |
| 37 | + for (auto&& b : batchResults) { |
| 38 | + auto portions = MakeAppendedPortions(b, GranuleMeta->GetPathId(), resultSchema->GetSnapshot(), GranuleMeta.get(), context); |
| 39 | + Y_ABORT_UNLESS(portions.size()); |
| 40 | + for (auto& portion : portions) { |
| 41 | + AppendedPortions.emplace_back(std::move(portion)); |
| 42 | + } |
| 43 | + } |
| 44 | +} |
40 | 45 |
|
| 46 | +void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstructionContext& context) noexcept { |
| 47 | + std::vector<TPortionInfoWithBlobs> portions = TPortionInfoWithBlobs::RestorePortions(SwitchedPortions, Blobs); |
| 48 | + Blobs.clear(); |
41 | 49 | static const TString portionIdFieldName = "$$__portion_id";
|
42 | 50 | static const TString portionRecordIndexFieldName = "$$__portion_record_idx";
|
43 | 51 | static const std::shared_ptr<arrow::Field> portionIdField = std::make_shared<arrow::Field>(portionIdFieldName, std::make_shared<arrow::UInt16Type>());
|
@@ -192,6 +200,34 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc
|
192 | 200 | recordIdx += slice.GetRecordsCount();
|
193 | 201 | }
|
194 | 202 | }
|
| 203 | +} |
| 204 | + |
| 205 | +TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstructionContext& context) noexcept { |
| 206 | + i64 portionsSize = 0; |
| 207 | + i64 portionsCount = 0; |
| 208 | + i64 insertedPortionsSize = 0; |
| 209 | + i64 compactedPortionsSize = 0; |
| 210 | + i64 otherPortionsSize = 0; |
| 211 | + for (auto&& i : SwitchedPortions) { |
| 212 | + if (i.GetMeta().GetProduced() == TPortionMeta::EProduced::INSERTED) { |
| 213 | + insertedPortionsSize += i.GetBlobBytes(); |
| 214 | + } else if (i.GetMeta().GetProduced() == TPortionMeta::EProduced::SPLIT_COMPACTED) { |
| 215 | + compactedPortionsSize += i.GetBlobBytes(); |
| 216 | + } else { |
| 217 | + otherPortionsSize += i.GetBlobBytes(); |
| 218 | + } |
| 219 | + portionsSize += i.GetBlobBytes(); |
| 220 | + ++portionsCount; |
| 221 | + } |
| 222 | + NChanges::TGeneralCompactionCounters::OnPortionsKind(insertedPortionsSize, compactedPortionsSize, otherPortionsSize); |
| 223 | + NChanges::TGeneralCompactionCounters::OnRepackPortions(portionsCount, portionsSize); |
| 224 | + |
| 225 | + if (AppDataVerified().ColumnShardConfig.GetUseChunkedMergeOnCompaction()) { |
| 226 | + BuildAppendedPortionsByChunks(context); |
| 227 | + } else { |
| 228 | + BuildAppendedPortionsByFullBatches(context); |
| 229 | + } |
| 230 | + |
195 | 231 | if (IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD)) {
|
196 | 232 | TStringBuilder sbSwitched;
|
197 | 233 | sbSwitched << "";
|
|
0 commit comments