Skip to content

Commit 2695540

Browse files
tiers reading optimization policy (#4045)
1 parent 5fe2fa9 commit 2695540

File tree

5 files changed

+74
-18
lines changed

5 files changed

+74
-18
lines changed

ydb/core/tx/columnshard/blobs_action/abstract/read.cpp

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,10 @@ void IBlobsReadingAction::StartReading(std::vector<TBlobRange>&& ranges) {
1010
for (auto&& i : ranges) {
1111
Counters->OnRequest(i.Size);
1212
}
13-
std::sort(ranges.begin(), ranges.end());
1413
THashSet<TBlobRange> result;
15-
std::optional<TBlobRange> currentRange;
16-
std::vector<TBlobRange> currentList;
17-
for (auto&& br : ranges) {
18-
if (!currentRange) {
19-
currentRange = br;
20-
} else if (!currentRange->TryGlueWithNext(br)) {
21-
result.emplace(*currentRange);
22-
Groups.emplace(*currentRange, std::move(currentList));
23-
currentRange = br;
24-
currentList.clear();
25-
}
26-
currentList.emplace_back(br);
27-
}
28-
if (currentRange) {
29-
result.emplace(*currentRange);
30-
Groups.emplace(*currentRange, std::move(currentList));
14+
Groups = GroupBlobsForOptimization(std::move(ranges));
15+
for (auto&& [range, _] :Groups) {
16+
result.emplace(range);
3117
}
3218
return DoStartReading(std::move(result));
3319
}

ydb/core/tx/columnshard/blobs_action/abstract/read.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,54 @@ class TActionReadBlobs {
7171
}
7272
};
7373

74+
class TBlobsGlueing {
75+
public:
76+
class TSequentialGluePolicy {
77+
public:
78+
bool Glue(TBlobRange& currentRange, const TBlobRange& addRange) const {
79+
return currentRange.TryGlueWithNext(addRange);
80+
}
81+
};
82+
83+
class TBlobGluePolicy {
84+
private:
85+
const ui64 BlobLimitSize = 8LLU << 20;
86+
public:
87+
TBlobGluePolicy(const ui64 blobLimitSize)
88+
: BlobLimitSize(blobLimitSize)
89+
{
90+
}
91+
92+
bool Glue(TBlobRange& currentRange, const TBlobRange& addRange) const {
93+
return currentRange.TryGlueSameBlob(addRange, BlobLimitSize);
94+
}
95+
};
96+
97+
template <class TGluePolicy>
98+
static THashMap<TBlobRange, std::vector<TBlobRange>> GroupRanges(std::vector<TBlobRange>&& ranges, const TGluePolicy& policy) {
99+
std::sort(ranges.begin(), ranges.end());
100+
THashMap<TBlobRange, std::vector<TBlobRange>> result;
101+
std::optional<TBlobRange> currentRange;
102+
std::vector<TBlobRange> currentList;
103+
for (auto&& br : ranges) {
104+
if (!currentRange) {
105+
currentRange = br;
106+
}
107+
else if (!policy.Glue(*currentRange, br)) {
108+
result.emplace(*currentRange, std::move(currentList));
109+
currentRange = br;
110+
currentList.clear();
111+
}
112+
currentList.emplace_back(br);
113+
}
114+
if (currentRange) {
115+
result.emplace(*currentRange, std::move(currentList));
116+
}
117+
return result;
118+
}
119+
120+
};
121+
74122
class IBlobsReadingAction: public ICommonBlobsAction {
75123
public:
76124
using TErrorStatus = TConclusionSpecialStatus<NKikimrProto::EReplyStatus, NKikimrProto::EReplyStatus::OK, NKikimrProto::EReplyStatus::ERROR>;
@@ -91,6 +139,7 @@ class IBlobsReadingAction: public ICommonBlobsAction {
91139
protected:
92140
virtual void DoStartReading(THashSet<TBlobRange>&& range) = 0;
93141
void StartReading(std::vector<TBlobRange>&& ranges);
142+
virtual THashMap<TBlobRange, std::vector<TBlobRange>> GroupBlobsForOptimization(std::vector<TBlobRange>&& ranges) const = 0;
94143
public:
95144

96145
const THashMap<TBlobRange, std::vector<TBlobRange>>& GetGroups() const {

ydb/core/tx/columnshard/blobs_action/bs/read.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ class TReadingAction: public IBlobsReadingAction {
1010
const TActorId BlobCacheActorId;
1111
protected:
1212
virtual void DoStartReading(THashSet<TBlobRange>&& ranges) override;
13+
virtual THashMap<TBlobRange, std::vector<TBlobRange>> GroupBlobsForOptimization(std::vector<TBlobRange>&& ranges) const override {
14+
return TBlobsGlueing::GroupRanges(std::move(ranges), TBlobsGlueing::TSequentialGluePolicy());
15+
}
1316
public:
1417

1518
TReadingAction(const TString& storageId, const TActorId& blobCacheActorId)

ydb/core/tx/columnshard/blobs_action/tier/read.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ class TReadingAction: public IBlobsReadingAction {
1111
const NWrappers::NExternalStorage::IExternalStorageOperator::TPtr ExternalStorageOperator;
1212
protected:
1313
virtual void DoStartReading(THashSet<TBlobRange>&& ranges) override;
14+
virtual THashMap<TBlobRange, std::vector<TBlobRange>> GroupBlobsForOptimization(std::vector<TBlobRange>&& ranges) const override {
15+
return TBlobsGlueing::GroupRanges(std::move(ranges), TBlobsGlueing::TBlobGluePolicy(8LLU << 20));
16+
}
1417
public:
1518

1619
TReadingAction(const TString& storageId, const NWrappers::NExternalStorage::IExternalStorageOperator::TPtr& storageOperator)

ydb/core/tx/columnshard/common/blob.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ struct TBlobRange {
193193

194194
bool operator<(const TBlobRange& br) const {
195195
if (BlobId != br.BlobId) {
196-
return BlobId.Hash() < br.BlobId.Hash();
196+
return BlobId.GetLogoBlobId().Compare(br.BlobId.GetLogoBlobId()) < 0;
197197
} else if (Offset != br.Offset) {
198198
return Offset < br.Offset;
199199
} else {
@@ -209,6 +209,21 @@ struct TBlobRange {
209209
return BlobId == br.BlobId && br.Offset + br.Size == Offset;
210210
}
211211

212+
bool TryGlueSameBlob(const TBlobRange& br, const ui64 limit) {
213+
if (GetBlobId() != br.GetBlobId()) {
214+
return false;
215+
}
216+
const ui32 right = std::max<ui32>(Offset + Size, br.Offset + br.Size);
217+
const ui32 offset = std::min<ui32>(Offset, br.Offset);
218+
const ui32 size = right - offset;
219+
if (size > limit) {
220+
return false;
221+
}
222+
Size = size;
223+
Offset = offset;
224+
return true;
225+
}
226+
212227
bool TryGlueWithNext(const TBlobRange& br) {
213228
if (!br.IsNextRangeFor(*this)) {
214229
return false;

0 commit comments

Comments
 (0)