Skip to content

Commit 287b4bb

Browse files
committed
KIKIMR-19139 Load index in BuildStats
1 parent 0abd9cc commit 287b4bb

14 files changed

+408
-87
lines changed

ydb/core/tablet_flat/flat_part_index_iter.h

+7-7
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ class TPartIndexIt {
8989
return DataOrGone();
9090
}
9191

92-
bool IsValid() {
92+
bool IsValid() const {
9393
return bool(Iter);
9494
}
9595

@@ -99,38 +99,38 @@ class TPartIndexIt {
9999
}
100100

101101
public:
102-
TRowId GetEndRowId() {
102+
TRowId GetEndRowId() const {
103103
return EndRowId;
104104
}
105105

106-
TPageId GetPageId() {
106+
TPageId GetPageId() const {
107107
Y_VERIFY(Index);
108108
Y_VERIFY(Iter);
109109
return Iter->GetPageId();
110110
}
111111

112-
TRowId GetRowId() {
112+
TRowId GetRowId() const {
113113
Y_VERIFY(Index);
114114
Y_VERIFY(Iter);
115115
return Iter->GetRowId();
116116
}
117117

118-
TRowId GetNextRowId() {
118+
TRowId GetNextRowId() const {
119119
Y_VERIFY(Index);
120120
auto next = Iter + 1;
121121
return next
122122
? next->GetRowId()
123123
: Max<TRowId>();
124124
}
125125

126-
const TRecord * GetRecord() {
126+
const TRecord * GetRecord() const {
127127
Y_VERIFY(Index);
128128
Y_VERIFY(Iter);
129129
return Iter.GetRecord();
130130
}
131131

132132
private:
133-
EReady DataOrGone() {
133+
EReady DataOrGone() const {
134134
return Iter ? EReady::Data : EReady::Gone;
135135
}
136136

ydb/core/tablet_flat/flat_stat_part.h

+58-29
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22

33
#include "flat_part_iface.h"
4+
#include "flat_part_index_iter.h"
45
#include "flat_part_laid.h"
56
#include "flat_page_frames.h"
67
#include "util_basics.h"
@@ -118,47 +119,69 @@ class TPartDataSizeHelper {
118119
// if page start key is not screened then the whole previous page is added to stats
119120
class TScreenedPartIndexIterator {
120121
public:
121-
TScreenedPartIndexIterator(TPartView partView, TIntrusiveConstPtr<TKeyCellDefaults> keyColumns,
122+
TScreenedPartIndexIterator(TPartView partView, IPages* env, TIntrusiveConstPtr<TKeyCellDefaults> keyColumns,
122123
TIntrusiveConstPtr<NPage::TFrames> small, TIntrusiveConstPtr<NPage::TFrames> large)
123124
: Part(std::move(partView.Part))
125+
, Pos(Part.Get(), env, {})
124126
, KeyColumns(std::move(keyColumns))
125127
, Screen(std::move(partView.Screen))
126128
, Small(std::move(small))
127129
, Large(std::move(large))
128130
, CurrentHole(TScreen::Iter(Screen, CurrentHoleIdx, 0, 1))
129131
{
130-
Pos = Part->Index->Begin();
131-
End = Part->Index->End();
132132
AltGroups.reserve(Part->GroupsCount - 1);
133133
for (ui32 group : xrange(size_t(1), Part->GroupsCount)) {
134-
AltGroups.emplace_back(Part.Get(), NPage::TGroupId(group));
134+
AltGroups.emplace_back(Part.Get(), env, NPage::TGroupId(group));
135135
}
136-
for (ui32 group : xrange(Part->HistoricGroupsCount)) {
137-
HistoryGroups.emplace_back(Part.Get(), NPage::TGroupId(group, true));
136+
for (ui32 group : xrange(Part->HistoricIndexes.size())) {
137+
HistoryGroups.emplace_back(Part.Get(), env, NPage::TGroupId(group, true));
138138
}
139+
}
140+
141+
EReady Start() {
142+
auto ready = Pos.Seek(0);
139143
FillKey();
144+
145+
for (auto& g : AltGroups) {
146+
if (g.Pos.Seek(0) == EReady::Page) {
147+
ready = EReady::Page;
148+
}
149+
}
150+
for (auto& g : HistoryGroups) {
151+
if (g.Pos.Seek(0) == EReady::Page) {
152+
ready = EReady::Page;
153+
}
154+
}
155+
156+
return ready;
140157
}
141158

142159
bool IsValid() const {
143-
return Pos != End;
160+
return Pos.IsValid();
144161
}
145162

146-
void Next(TPartDataStats& stats) {
163+
EReady Next(TPartDataStats& stats) {
147164
Y_VERIFY(IsValid());
148165

149-
auto curPageId = Pos->GetPageId();
150-
LastRowId = Pos->GetRowId();
151-
++Pos;
166+
auto curPageId = Pos.GetPageId();
167+
LastRowId = Pos.GetRowId();
168+
auto ready = Pos.Next();
169+
if (ready == EReady::Page) {
170+
return ready;
171+
}
152172
ui64 rowCount = IncludedRows(GetLastRowId(), GetCurrentRowId());
153173
stats.RowCount += rowCount;
154174

155175
if (rowCount) AddPageSize(stats.DataSize, curPageId);
156-
TRowId nextRowId = Pos ? Pos->GetRowId() : Max<TRowId>();
176+
TRowId nextRowId = ready == EReady::Data ? Pos.GetRowId() : Max<TRowId>();
157177
for (auto& g : AltGroups) {
158-
while (g.Pos && g.Pos->GetRowId() < nextRowId) {
178+
while (g.Pos.IsValid() && g.Pos.GetRowId() < nextRowId) {
159179
// eagerly include all data up to the next row id
160-
if (rowCount) AddPageSize(stats.DataSize, g.Pos->GetPageId(), g.GroupId);
161-
++g.Pos;
180+
if (rowCount) AddPageSize(stats.DataSize, g.Pos.GetPageId(), g.GroupId);
181+
if (g.Pos.Next() == EReady::Page) {
182+
ready = EReady::Page;
183+
break;
184+
}
162185
}
163186
}
164187

@@ -167,18 +190,24 @@ class TScreenedPartIndexIterator {
167190
auto& h = HistoryGroups[0];
168191
const auto& hscheme = Part->Scheme->HistoryGroup;
169192
Y_VERIFY_DEBUG(hscheme.ColsKeyIdx.size() == 3);
170-
while (h.Pos && h.Pos->Cell(hscheme.ColsKeyIdx[0]).AsValue<TRowId>() < nextRowId) {
193+
while (h.Pos.IsValid() && h.Pos.GetRecord()->Cell(hscheme.ColsKeyIdx[0]).AsValue<TRowId>() < nextRowId) {
171194
// eagerly include all history up to the next row id
172-
if (rowCount) AddPageSize(stats.DataSize, h.Pos->GetPageId(), h.GroupId);
173-
++h.Pos;
195+
if (rowCount) AddPageSize(stats.DataSize, h.Pos.GetPageId(), h.GroupId);
196+
if (h.Pos.Next() == EReady::Page) {
197+
ready = EReady::Page;
198+
break;
199+
}
174200
}
175-
TRowId nextHistoryRowId = h.Pos ? h.Pos->GetRowId() : Max<TRowId>();
201+
TRowId nextHistoryRowId = h.Pos.IsValid() ? h.Pos.GetRowId() : Max<TRowId>();
176202
for (size_t index = 1; index < HistoryGroups.size(); ++index) {
177203
auto& g = HistoryGroups[index];
178-
while (g.Pos && g.Pos->GetRowId() < nextHistoryRowId) {
204+
while (g.Pos.IsValid() && g.Pos.GetRowId() < nextHistoryRowId) {
179205
// eagerly include all data up to the next row id
180-
if (rowCount) AddPageSize(stats.DataSize, g.Pos->GetPageId(), g.GroupId);
181-
++g.Pos;
206+
if (rowCount) AddPageSize(stats.DataSize, g.Pos.GetPageId(), g.GroupId);
207+
if (g.Pos.Next() == EReady::Page) {
208+
ready = EReady::Page;
209+
break;
210+
}
182211
}
183212
}
184213
}
@@ -193,6 +222,7 @@ class TScreenedPartIndexIterator {
193222
}
194223

195224
FillKey();
225+
return ready;
196226
}
197227

198228
TDbTupleRef GetCurrentKey() const {
@@ -207,7 +237,7 @@ class TScreenedPartIndexIterator {
207237

208238
ui64 GetCurrentRowId() const {
209239
if (IsValid()) {
210-
return Pos->GetRowId();
240+
return Pos.GetRowId();
211241
}
212242
if (TRowId endRowId = Part->Index.GetEndRowId(); endRowId != Max<TRowId>()) {
213243
// This would include the last page rows when known
@@ -233,7 +263,7 @@ class TScreenedPartIndexIterator {
233263
ui32 keyIdx = 0;
234264
// Add columns that are present in the part
235265
for (;keyIdx < Part->Scheme->Groups[0].KeyTypes.size(); ++keyIdx) {
236-
CurrentKey.push_back(Pos->Cell(Part->Scheme->Groups[0].ColsKeyIdx[keyIdx]));
266+
CurrentKey.push_back(Pos.GetRecord()->Cell(Part->Scheme->Groups[0].ColsKeyIdx[keyIdx]));
237267
}
238268

239269
// Extend with default values if needed
@@ -293,20 +323,19 @@ class TScreenedPartIndexIterator {
293323

294324
private:
295325
struct TGroupState {
296-
NPage::TIndex::TIter Pos;
326+
TPartIndexIt Pos;
297327
const NPage::TGroupId GroupId;
298328

299-
TGroupState(const TPart* part, NPage::TGroupId groupId)
300-
: Pos(part->GetGroupIndex(groupId)->Begin())
329+
TGroupState(const TPart* part, IPages* env, NPage::TGroupId groupId)
330+
: Pos(part, env, groupId)
301331
, GroupId(groupId)
302332
{ }
303333
};
304334

305335
private:
306336
TIntrusiveConstPtr<TPart> Part;
337+
TPartIndexIt Pos;
307338
TIntrusiveConstPtr<TKeyCellDefaults> KeyColumns;
308-
NPage::TIndex::TIter Pos;
309-
NPage::TIndex::TIter End;
310339
TSmallVec<TCell> CurrentKey;
311340
ui64 LastRowId = 0;
312341
TSmallVec<TGroupState> AltGroups;

ydb/core/tablet_flat/flat_stat_table.cpp

+20-6
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,38 @@
66
namespace NKikimr {
77
namespace NTable {
88

9-
void BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, const IPages* env) {
10-
Y_UNUSED(env);
11-
9+
bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env) {
1210
stats.Clear();
1311

1412
TPartDataStats stIterStats = { };
1513
TStatsIterator stIter(subset.Scheme->Keys);
1614

1715
// Make index iterators for all parts
16+
bool started = true;
1817
for (auto& pi : subset.Flatten) {
1918
stats.IndexSize.Add(pi->IndexesRawSize, pi->Label.Channel());
20-
TAutoPtr<TScreenedPartIndexIterator> iter = new TScreenedPartIndexIterator(pi, subset.Scheme->Keys, pi->Small, pi->Large);
21-
if (iter->IsValid()) {
19+
TAutoPtr<TScreenedPartIndexIterator> iter = new TScreenedPartIndexIterator(pi, env, subset.Scheme->Keys, pi->Small, pi->Large);
20+
auto ready = iter->Start();
21+
if (ready == EReady::Page) {
22+
started = false;
23+
} else if (ready == EReady::Data) {
2224
stIter.Add(iter);
2325
}
2426
}
27+
if (!started) {
28+
return false;
29+
}
2530

2631
ui64 prevRows = 0;
2732
ui64 prevSize = 0;
28-
while (stIter.Next(stIterStats)) {
33+
while (true) {
34+
auto ready = stIter.Next(stIterStats);
35+
if (ready == EReady::Page) {
36+
return false;
37+
} else if (ready == EReady::Gone) {
38+
break;
39+
}
40+
2941
const bool nextRowsBucket = (stIterStats.RowCount >= prevRows + rowCountResolution);
3042
const bool nextSizeBucket = (stIterStats.DataSize.Size >= prevSize + dataSizeResolution);
3143

@@ -48,6 +60,8 @@ void BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, u
4860

4961
stats.RowCount = stIterStats.RowCount;
5062
stats.DataSize = std::move(stIterStats.DataSize);
63+
64+
return true;
5165
}
5266

5367
void GetPartOwners(const TSubset& subset, THashSet<ui64>& partOwners) {

ydb/core/tablet_flat/flat_stat_table.h

+13-14
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,7 @@ class TStatsIterator {
2525
Heap.push(it);
2626
}
2727

28-
/**
29-
* @return true when we haven't reached the end and have current key
30-
* @return false when we have reached the end and don't have current key
31-
*/
32-
bool Next(TPartDataStats& stats) {
28+
EReady Next(TPartDataStats& stats) {
3329
ui64 lastRowCount = stats.RowCount;
3430
ui64 lastDataSize = stats.DataSize.Size;
3531

@@ -41,24 +37,32 @@ class TStatsIterator {
4137
TSerializedCellVec serialized = TSerializedCellVec(TSerializedCellVec::Serialize({it->GetCurrentKey().Columns, it->GetCurrentKey().ColumnCount}));
4238
TDbTupleRef key(KeyColumns->BasicTypes().data(), serialized.GetCells().data(), serialized.GetCells().size());
4339

44-
if (MoveIterator(it, stats))
40+
auto ready = it->Next(stats);
41+
if (ready == EReady::Page) {
42+
return ready;
43+
} else if (ready == EReady::Data) {
4544
Heap.push(it);
45+
}
4646

4747
// guarantees that all results will be different
4848
while (!Heap.empty() && CompareKeys(key, Heap.top()->GetCurrentKey()) == 0) {
4949
it = Heap.top();
5050
Heap.pop();
5151

52-
if (MoveIterator(it, stats))
52+
ready = it->Next(stats);
53+
if (ready == EReady::Page) {
54+
return ready;
55+
} else if (ready == EReady::Data) {
5356
Heap.push(it);
57+
}
5458
}
5559

5660
if (stats.RowCount != lastRowCount && stats.DataSize.Size != lastDataSize) {
5761
break;
5862
}
5963
}
6064

61-
return !Heap.empty();
65+
return Heap.empty() ? EReady::Gone : EReady::Data;
6266
}
6367

6468
TDbTupleRef GetCurrentKey() const {
@@ -79,11 +83,6 @@ class TStatsIterator {
7983
}
8084
};
8185

82-
bool MoveIterator(TScreenedPartIndexIterator* it, TPartDataStats& stats) {
83-
it->Next(stats);
84-
return it->IsValid();
85-
}
86-
8786
TIntrusiveConstPtr<TKeyCellDefaults> KeyColumns;
8887
THolderVector<TScreenedPartIndexIterator> Iterators;
8988
TPriorityQueue<TScreenedPartIndexIterator*, TSmallVec<TScreenedPartIndexIterator*>, TIterKeyGreater> Heap;
@@ -186,7 +185,7 @@ class TKeyAccessSample {
186185
THashMap<TString, ui64> KeyRefCount;
187186
};
188187

189-
void BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, const IPages* env);
188+
bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env);
190189
void GetPartOwners(const TSubset& subset, THashSet<ui64>& partOwners);
191190

192191
}}

0 commit comments

Comments
 (0)