Skip to content

Commit 65efb82

Browse files
authored
Iterative B-Tree histograms builder (#6047)
1 parent fb31079 commit 65efb82

13 files changed

+1076
-764
lines changed

ydb/core/tablet_flat/flat_stat_table.cpp

-3
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@ bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, u
1717
}
1818
}
1919

20-
// TODO: enable b-tree index after benchmarks
21-
mixedIndex = true;
22-
2320
return mixedIndex
2421
? BuildStatsMixedIndex(subset, stats, rowCountResolution, dataSizeResolution, env, yieldHandler)
2522
: BuildStatsBTreeIndex(subset, stats, histogramBucketsCount, env, yieldHandler);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
#include "flat_stat_table.h"
2+
#include "flat_table_subset.h"
3+
#include "flat_stat_table_btree_index.h"
4+
5+
namespace NKikimr::NTable {
6+
7+
namespace {
8+
9+
using TGroupId = NPage::TGroupId;
10+
using TFrames = NPage::TFrames;
11+
using TBtreeIndexNode = NPage::TBtreeIndexNode;
12+
using TChild = TBtreeIndexNode::TChild;
13+
using TColumns = TBtreeIndexNode::TColumns;
14+
using TCells = NPage::TCells;
15+
16+
ui64 GetPrevDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, bool& ready) {
17+
auto& meta = part->IndexPages.GetBTree(groupId);
18+
19+
if (rowId == 0) {
20+
return 0;
21+
}
22+
if (rowId >= meta.GetRowCount()) {
23+
return meta.GetDataSize();
24+
}
25+
26+
TPageId pageId = meta.GetPageId();
27+
ui64 prevDataSize = 0;
28+
29+
for (ui32 height = 0; height < meta.LevelCount; height++) {
30+
auto page = env->TryGetPage(part, pageId, {});
31+
if (!page) {
32+
ready = false;
33+
return prevDataSize;
34+
}
35+
auto node = TBtreeIndexNode(*page);
36+
auto pos = node.Seek(rowId);
37+
38+
pageId = node.GetShortChild(pos).GetPageId();
39+
if (pos) {
40+
prevDataSize = node.GetShortChild(pos - 1).GetDataSize();
41+
}
42+
}
43+
44+
return prevDataSize;
45+
}
46+
47+
ui64 GetPrevHistoricDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, TRowId& historicRowId, bool& ready) {
48+
Y_ABORT_UNLESS(groupId == TGroupId(0, true));
49+
50+
auto& meta = part->IndexPages.GetBTree(groupId);
51+
52+
if (rowId == 0) {
53+
historicRowId = 0;
54+
return 0;
55+
}
56+
if (rowId >= part->IndexPages.GetBTree({}).GetRowCount()) {
57+
historicRowId = meta.GetRowCount();
58+
return meta.GetDataSize();
59+
}
60+
61+
TPageId pageId = meta.GetPageId();
62+
ui64 prevDataSize = 0;
63+
historicRowId = 0;
64+
65+
// Minimum key is (startRowId, max, max)
66+
ui64 startStep = Max<ui64>();
67+
ui64 startTxId = Max<ui64>();
68+
TCell key1Cells[3] = {
69+
TCell::Make(rowId),
70+
TCell::Make(startStep),
71+
TCell::Make(startTxId),
72+
};
73+
TCells key1{ key1Cells, 3 };
74+
75+
for (ui32 height = 0; height < meta.LevelCount; height++) {
76+
auto page = env->TryGetPage(part, pageId, {});
77+
if (!page) {
78+
ready = false;
79+
return prevDataSize;
80+
}
81+
auto node = TBtreeIndexNode(*page);
82+
auto pos = node.Seek(ESeek::Lower, key1, part->Scheme->HistoryGroup.ColsKeyIdx, part->Scheme->HistoryKeys.Get());
83+
84+
pageId = node.GetShortChild(pos).GetPageId();
85+
if (pos) {
86+
const auto& prevChild = node.GetShortChild(pos - 1);
87+
prevDataSize = prevChild.GetDataSize();
88+
historicRowId = prevChild.GetRowCount();
89+
}
90+
}
91+
92+
return prevDataSize;
93+
}
94+
95+
void AddBlobsSize(const TPart* part, TChanneledDataSize& stats, const TFrames* frames, ELargeObj lob, TRowId beginRowId, TRowId endRowId) noexcept {
96+
ui32 page = frames->Lower(beginRowId, 0, Max<ui32>());
97+
98+
while (auto &rel = frames->Relation(page)) {
99+
if (rel.Row < endRowId) {
100+
auto channel = part->GetPageChannel(lob, page);
101+
stats.Add(rel.Size, channel);
102+
++page;
103+
} else if (!rel.IsHead()) {
104+
Y_ABORT("Got unaligned TFrames head record");
105+
} else {
106+
break;
107+
}
108+
}
109+
}
110+
111+
bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsYieldHandler yieldHandler) {
112+
bool ready = true;
113+
114+
if (!part.Slices || part.Slices->empty()) {
115+
return true;
116+
}
117+
118+
if (part->GroupsCount) { // main group
119+
TGroupId groupId{};
120+
auto channel = part->GetGroupChannel(groupId);
121+
122+
for (const auto& slice : *part.Slices) {
123+
yieldHandler();
124+
125+
stats.RowCount += slice.EndRowId() - slice.BeginRowId();
126+
127+
ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready);
128+
ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready);
129+
if (ready && endDataSize > beginDataSize) {
130+
stats.DataSize.Add(endDataSize - beginDataSize, channel);
131+
}
132+
133+
if (part->Small) {
134+
AddBlobsSize(part.Part.Get(), stats.DataSize, part->Small.Get(), ELargeObj::Outer, slice.BeginRowId(), slice.EndRowId());
135+
}
136+
if (part->Large) {
137+
AddBlobsSize(part.Part.Get(), stats.DataSize, part->Large.Get(), ELargeObj::Extern, slice.BeginRowId(), slice.EndRowId());
138+
}
139+
}
140+
}
141+
142+
for (ui32 groupIndex : xrange<ui32>(1, part->GroupsCount)) {
143+
TGroupId groupId{groupIndex};
144+
auto channel = part->GetGroupChannel(groupId);
145+
for (const auto& slice : *part.Slices) {
146+
yieldHandler();
147+
148+
ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready);
149+
ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready);
150+
if (ready && endDataSize > beginDataSize) {
151+
stats.DataSize.Add(endDataSize - beginDataSize, channel);
152+
}
153+
}
154+
}
155+
156+
TVector<std::pair<TRowId, TRowId>> historicSlices;
157+
158+
if (part->HistoricGroupsCount) { // main historic group
159+
TGroupId groupId{0, true};
160+
auto channel = part->GetGroupChannel(groupId);
161+
for (const auto& slice : *part.Slices) {
162+
yieldHandler();
163+
164+
TRowId beginRowId, endRowId;
165+
bool readySlice = true;
166+
ui64 beginDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, beginRowId, readySlice);
167+
ui64 endDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, endRowId, readySlice);
168+
ready &= readySlice;
169+
if (ready && endDataSize > beginDataSize) {
170+
stats.DataSize.Add(endDataSize - beginDataSize, channel);
171+
}
172+
if (readySlice && endRowId > beginRowId) {
173+
historicSlices.emplace_back(beginRowId, endRowId);
174+
}
175+
}
176+
}
177+
178+
for (ui32 groupIndex : xrange<ui32>(1, part->HistoricGroupsCount)) {
179+
TGroupId groupId{groupIndex, true};
180+
auto channel = part->GetGroupChannel(groupId);
181+
for (const auto& slice : historicSlices) {
182+
yieldHandler();
183+
184+
ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.first, env, ready);
185+
ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.second, env, ready);
186+
if (ready && endDataSize > beginDataSize) {
187+
stats.DataSize.Add(endDataSize - beginDataSize, channel);
188+
}
189+
}
190+
}
191+
192+
return ready;
193+
}
194+
195+
}
196+
197+
bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) {
198+
stats.Clear();
199+
200+
bool ready = true;
201+
for (const auto& part : subset.Flatten) {
202+
stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel());
203+
ready &= AddDataSize(part, stats, env, yieldHandler);
204+
}
205+
206+
if (!ready) {
207+
return false;
208+
}
209+
210+
ready &= BuildStatsHistogramsBTreeIndex(subset, stats,
211+
stats.RowCount / histogramBucketsCount, stats.DataSize.Size / histogramBucketsCount,
212+
env, yieldHandler);
213+
214+
return ready;
215+
}
216+
217+
}

0 commit comments

Comments
 (0)