Skip to content

Commit e2e0636

Browse files
clean configuration for splitting
1 parent a05ef15 commit e2e0636

File tree

10 files changed

+28
-26
lines changed

10 files changed

+28
-26
lines changed

ydb/core/tx/columnshard/engines/changes/compaction.cpp

+3-12
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,6 @@ bool TCompactColumnEngineChanges::DoApplyChanges(TColumnEngineForLogs& self, TAp
3131
return TBase::DoApplyChanges(self, context);
3232
}
3333

34-
ui32 TCompactColumnEngineChanges::NumSplitInto(const ui32 srcRows) const {
35-
Y_ABORT_UNLESS(srcRows > 1);
36-
const ui64 totalBytes = TotalBlobsSize();
37-
const ui32 numSplitInto = (totalBytes / Limits.GranuleSizeForOverloadPrevent) + 1;
38-
return std::max<ui32>(2, numSplitInto);
39-
}
40-
4134
void TCompactColumnEngineChanges::DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) {
4235
TBase::DoWriteIndex(self, context);
4336
}
@@ -75,11 +68,9 @@ void TCompactColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, T
7568
NeedGranuleStatusProvide = false;
7669
}
7770

78-
TCompactColumnEngineChanges::TCompactColumnEngineChanges(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const std::vector<std::shared_ptr<TPortionInfo>>& portions, const TSaverContext& saverContext)
79-
: TBase(limits.GetSplitSettings(), saverContext, StaticTypeName())
80-
, Limits(limits)
81-
, GranuleMeta(granule)
82-
{
71+
TCompactColumnEngineChanges::TCompactColumnEngineChanges(const TSplitSettings& splitSettings, std::shared_ptr<TGranuleMeta> granule, const std::vector<std::shared_ptr<TPortionInfo>>& portions, const TSaverContext& saverContext)
72+
: TBase(splitSettings, saverContext, StaticTypeName())
73+
, GranuleMeta(granule) {
8374
Y_ABORT_UNLESS(GranuleMeta);
8475

8576
SwitchedPortions.reserve(portions.size());

ydb/core/tx/columnshard/engines/changes/compaction.h

+1-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ class TCompactColumnEngineChanges: public TChangesWithAppend {
1212
using TBase = TChangesWithAppend;
1313
bool NeedGranuleStatusProvide = false;
1414
protected:
15-
const TCompactionLimits Limits;
1615
std::shared_ptr<TGranuleMeta> GranuleMeta;
1716

1817
virtual void DoStart(NColumnShard::TColumnShard& self) override;
@@ -31,14 +30,12 @@ class TCompactColumnEngineChanges: public TChangesWithAppend {
3130

3231
virtual THashSet<TPortionAddress> GetTouchedPortions() const override;
3332

34-
TCompactColumnEngineChanges(const TCompactionLimits& limits, std::shared_ptr<TGranuleMeta> granule, const std::vector<std::shared_ptr<TPortionInfo>>& portions, const TSaverContext& saverContext);
33+
TCompactColumnEngineChanges(const TSplitSettings& splitSettings, std::shared_ptr<TGranuleMeta> granule, const std::vector<std::shared_ptr<TPortionInfo>>& portions, const TSaverContext& saverContext);
3534
~TCompactColumnEngineChanges();
3635

3736
static TString StaticTypeName() {
3837
return "CS::GENERAL";
3938
}
40-
41-
ui32 NumSplitInto(const ui32 srcRows) const;
4239
};
4340

4441
}

ydb/core/tx/columnshard/engines/changes/general_compaction.cpp

+3-4
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc
112112
std::map<std::string, std::vector<TColumnPortionResult>> columnChunks;
113113
ui32 batchIdx = 0;
114114
for (auto&& batchResult : batchResults) {
115-
const ui32 portionRecordsCountLimit = batchResult->num_rows() / (batchResult->num_rows() / 10000 + 1) + 1;
116-
TColumnMergeContext context(columnId, resultSchema, portionRecordsCountLimit, 50 * 1024 * 1024, columnInfo, SaverContext);
115+
const ui32 portionRecordsCountLimit = batchResult->num_rows() / (batchResult->num_rows() / GetSplitSettings().GetExpectedRecordsCountOnPage() + 1) + 1;
116+
TColumnMergeContext context(columnId, resultSchema, portionRecordsCountLimit, GetSplitSettings().GetExpectedUnpackColumnChunkRawSize(), columnInfo, SaverContext);
117117
TMergedColumn mColumn(context);
118118

119119
auto columnPortionIdx = batchResult->GetColumnByName(portionIdFieldName);
@@ -177,8 +177,7 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc
177177
}
178178
batchSlices.emplace_back(portionColumns, schemaDetails, context.Counters.SplitterCounters, GetSplitSettings());
179179
}
180-
181-
TSimilarSlicer slicer(4 * 1024 * 1024);
180+
TSimilarSlicer slicer(GetSplitSettings().GetExpectedPortionSize());
182181
auto packs = slicer.Split(batchSlices);
183182

184183
ui32 recordIdx = 0;

ydb/core/tx/columnshard/engines/changes/with_appended.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ namespace NKikimr::NOlap {
99
class TChangesWithAppend: public TColumnEngineChanges {
1010
private:
1111
using TBase = TColumnEngineChanges;
12-
TSplitSettings SplitSettings;
12+
1313
protected:
14+
TSplitSettings SplitSettings;
1415
TSaverContext SaverContext;
1516
virtual void DoDebugString(TStringOutput& out) const override;
1617
virtual void DoCompile(TFinalizationContext& context) override;

ydb/core/tx/columnshard/engines/storage/optimizer/intervals/blob_size.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ std::shared_ptr<NKikimr::NOlap::TColumnEngineChanges> TBlobsWithSizeLimit::Build
3030
if (currentSum > SizeLimitToMerge || PortionsCount > CountLimitToMerge) {
3131
AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "take_granule_with_small")("portions", portions.size())("current_sum", currentSum);
3232
TSaverContext saverContext(StoragesManager->GetOperator(tierName.value_or(IStoragesManager::DefaultStorageId)), StoragesManager);
33-
return std::make_shared<NCompaction::TGeneralCompactColumnEngineChanges>(limits, granule, portions, saverContext);
33+
return std::make_shared<NCompaction::TGeneralCompactColumnEngineChanges>(limits.GetSplitSettings(), granule, portions, saverContext);
3434
} else {
3535
AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "take_granule_with_small")("skip", "not_enough_data");
3636
}

ydb/core/tx/columnshard/engines/storage/optimizer/intervals/optimizer.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ std::shared_ptr<TColumnEngineChanges> TIntervalsOptimizerPlanner::DoGetOptimizat
6565
AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "take_granule")("features", features.DebugJson().GetStringRobust())("count", features.GetPortionsCount());
6666

6767
TSaverContext saverContext(StoragesManager->GetOperator(tierName.value_or(IStoragesManager::DefaultStorageId)), StoragesManager);
68-
return std::make_shared<NCompaction::TGeneralCompactColumnEngineChanges>(limits, granule, portions, saverContext);
68+
return std::make_shared<NCompaction::TGeneralCompactColumnEngineChanges>(limits.GetSplitSettings(), granule, portions, saverContext);
6969
}
7070

7171
void TIntervalsOptimizerPlanner::RemovePortion(const std::shared_ptr<TPortionInfo>& info) {

ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/optimizer.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,7 @@ class TPortionsBucket: public TMoveOnly {
747747
AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("stop_instant", stopInstant.value_or(TInstant::Zero()))("size", size)("next", NextBorder ? NextBorder->DebugString() : "")
748748
("count", portions.size())("info", Others.DebugString())("event", "start_optimization")("stop_point", stopPoint ? stopPoint->DebugString() : "");
749749
TSaverContext saverContext(storagesManager->GetOperator(IStoragesManager::DefaultStorageId), storagesManager);
750-
auto result = std::make_shared<NCompaction::TGeneralCompactColumnEngineChanges>(limits, granule, portions, saverContext);
750+
auto result = std::make_shared<NCompaction::TGeneralCompactColumnEngineChanges>(limits.GetSplitSettings(), granule, portions, saverContext);
751751
if (MainPortion) {
752752
NIndexedReader::TSortableBatchPosition pos(MainPortion->IndexKeyStart().ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false);
753753
result->AddCheckPoint(pos, true, false);

ydb/core/tx/columnshard/engines/storage/optimizer/levels/optimizer.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ class TLevel {
450450
positions.emplace_back(*position);
451451
}
452452
TSaverContext saverContext(StoragesManager->GetOperator(IStoragesManager::DefaultStorageId), StoragesManager);
453-
auto result = std::make_shared<NCompaction::TGeneralCompactColumnEngineChanges>(CompactionLimits, granule, portions, saverContext);
453+
auto result = std::make_shared<NCompaction::TGeneralCompactColumnEngineChanges>(CompactionLimits.GetSplitSettings(), granule, portions, saverContext);
454454
for (auto&& i : positions) {
455455
result->AddCheckPoint(i);
456456
}
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "normalizer.h"
22

33
namespace NKikimr::NOlap {
4+
45
}

ydb/core/tx/columnshard/splitter/settings.h

+14-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#pragma once
2+
23
#include <ydb/library/accessor/accessor.h>
4+
35
#include <util/system/types.h>
46

57
namespace NKikimr::NOlap {
@@ -9,11 +11,22 @@ class TSplitSettings {
911
static const inline i64 DefaultMaxBlobSize = 8 * 1024 * 1024;
1012
static const inline i64 DefaultMinBlobSize = 4 * 1024 * 1024;
1113
static const inline i64 DefaultMinRecordsCount = 10000;
12-
static const inline i64 DefaultMaxPortionSize = 4 * DefaultMaxBlobSize;
14+
static const inline i64 DefaultMaxPortionSize = 6 * DefaultMaxBlobSize;
1315
YDB_ACCESSOR(i64, MaxBlobSize, DefaultMaxBlobSize);
1416
YDB_ACCESSOR(i64, MinBlobSize, DefaultMinBlobSize);
1517
YDB_ACCESSOR(i64, MinRecordsCount, DefaultMinRecordsCount);
1618
YDB_ACCESSOR(i64, MaxPortionSize, DefaultMaxPortionSize);
1719
public:
20+
ui64 GetExpectedRecordsCountOnPage() const {
21+
return 1.5 * MinRecordsCount;
22+
}
23+
24+
ui64 GetExpectedUnpackColumnChunkRawSize() const {
25+
return (ui64)50 * 1024 * 1024;
26+
}
27+
28+
ui64 GetExpectedPortionSize() const {
29+
return MaxPortionSize;
30+
}
1831
};
1932
}

0 commit comments

Comments
 (0)