Skip to content

Commit 402da95

Browse files
committed
KIKIMR-19521 BTreeIndex Seek RowId
1 parent 5a84270 commit 402da95

File tree

6 files changed

+379
-28
lines changed

6 files changed

+379
-28
lines changed

ydb/core/tablet_flat/flat_page_btree_index.h

+54-2
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ namespace NKikimr::NTable::NPage {
4949
using TColumns = TArrayRef<const TPartScheme::TColumn>;
5050

5151
#pragma pack(push,1)
52-
public:
5352
struct THeader {
5453
TRecIdx KeysCount;
5554
TPgSize KeysSize;
@@ -191,6 +190,11 @@ namespace NKikimr::NTable::NPage {
191190
return Header->KeysCount;
192191
}
193192

193+
TRecIdx GetChildrenCount() const noexcept
194+
{
195+
return GetKeysCount() + 1;
196+
}
197+
194198
TCellsIter GetKeyCells(TRecIdx pos, TColumns columns) const noexcept
195199
{
196200
if (IsFixedFormat()) {
@@ -207,7 +211,55 @@ namespace NKikimr::NTable::NPage {
207211
return Children[pos];
208212
}
209213

210-
// TODO: Seek methods will go here
214+
TRecIdx Seek(TRowId rowId, std::optional<TRecIdx> on = { }) const noexcept
215+
{
216+
const TRecIdx childrenCount = GetChildrenCount();
217+
if (on >= childrenCount) {
218+
Y_DEBUG_ABORT_UNLESS(false, "Should point to some child");
219+
on = { };
220+
}
221+
222+
const auto cmp = [](TRowId rowId, const TChild& child) {
223+
return rowId < child.Count;
224+
};
225+
226+
TRecIdx result;
227+
if (!on) {
228+
// Use a full binary search
229+
result = std::upper_bound(Children, Children + childrenCount, rowId, cmp) - Children;
230+
} else if (Children[*on].Count <= rowId) {
231+
// Try a short linear search first
232+
result = *on;
233+
for (int linear = 0; linear < 4; ++linear) {
234+
result++;
235+
Y_ABORT_UNLESS(result < childrenCount, "Should always seek some child");
236+
if (Children[result].Count > rowId) {
237+
return result;
238+
}
239+
}
240+
241+
// Binary search from the next record
242+
result = std::upper_bound(Children + result + 1, Children + childrenCount, rowId, cmp) - Children;
243+
} else { // Children[*on].Count > rowId
244+
// Try a short linear search first
245+
result = *on;
246+
for (int linear = 0; linear < 4; ++linear) {
247+
if (result == 0) {
248+
return 0;
249+
}
250+
if (Children[result - 1].Count <= rowId) {
251+
return result;
252+
}
253+
result--;
254+
}
255+
256+
// Binary search up to current record
257+
result = std::upper_bound(Children, Children + result, rowId, cmp) - Children;
258+
}
259+
260+
Y_ABORT_UNLESS(result < childrenCount, "Should always seek some child");
261+
return result;
262+
}
211263

212264
private:
213265
TSharedData Raw;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#pragma once
2+
3+
#include "flat_part_iface.h"
4+
#include "flat_page_index.h"
5+
#include "flat_table_part.h"
6+
7+
8+
namespace NKikimr::NTable {
9+
10+
class TPartBtreeIndexIt {
11+
using TCells = NPage::TCells;
12+
using TBtreeIndexNode = NPage::TBtreeIndexNode;
13+
using TGroupId = NPage::TGroupId;
14+
using TRecIdx = NPage::TRecIdx;
15+
using TChild = TBtreeIndexNode::TChild;
16+
using TBtreeIndexMeta = NPage::TBtreeIndexMeta;
17+
18+
struct TNodeState {
19+
TChild Meta;
20+
TRowId BeginRowId;
21+
TRowId EndRowId;
22+
// TCells BeginKey;
23+
// TCells EndKey;
24+
std::optional<TBtreeIndexNode> Node;
25+
std::optional<TRecIdx> Pos;
26+
27+
TNodeState(TChild meta, TRowId beginRowId, TRowId endRowId)
28+
: Meta(meta)
29+
, BeginRowId(beginRowId)
30+
, EndRowId(endRowId)
31+
{
32+
}
33+
34+
bool HasRow(TRowId rowId) const {
35+
return BeginRowId <= rowId && rowId < EndRowId;
36+
}
37+
};
38+
39+
public:
40+
TPartBtreeIndexIt(const TPart* part, IPages* env, TGroupId groupId)
41+
: Part(part)
42+
, Env(env)
43+
, GroupId(groupId)
44+
, Meta(groupId.IsMain() ? part->IndexPages.BTreeGroups[groupId.Index] : part->IndexPages.BTreeHistoric[groupId.Index])
45+
{
46+
State.emplace_back(Meta, 0, Meta.Count);
47+
}
48+
49+
EReady Seek(TRowId rowId) {
50+
if (rowId >= Meta.Count) {
51+
return Exhaust();
52+
}
53+
54+
while (State.size() > 1 && !State.back().HasRow(rowId)) {
55+
State.pop_back();
56+
}
57+
58+
if (IsExhausted()) {
59+
// don't use exhausted state as an initial one
60+
State[0].Pos = { };
61+
}
62+
63+
for (size_t level : xrange(State.size() - 1, Meta.LevelsCount)) {
64+
auto &state = State[level];
65+
Y_ABORT_UNLESS(state.HasRow(rowId));
66+
if (!TryLoad(state)) {
67+
// exiting with an intermediate state
68+
Y_DEBUG_ABORT_UNLESS(!IsLeaf() && !IsExhausted());
69+
return EReady::Page;
70+
}
71+
auto pos = state.Node->Seek(rowId, state.Pos);
72+
state.Pos.emplace(pos);
73+
74+
auto child = state.Node->GetChild(pos);
75+
TRowId firstRowId = pos ? state.Node->GetChild(pos - 1).Count : state.BeginRowId;
76+
TRowId lastRowId = child.Count;
77+
State.emplace_back(child, firstRowId, lastRowId);
78+
}
79+
80+
// State.back() points to the target data page
81+
Y_ABORT_UNLESS(IsLeaf());
82+
Y_ABORT_UNLESS(State.back().HasRow(rowId));
83+
return EReady::Data;
84+
}
85+
86+
// EReady Next() {
87+
// Y_DEBUG_ABORT_UNLESS(IsLeaf());
88+
// return Exhaust();
89+
// }
90+
91+
// EReady Prev() {
92+
// Y_DEBUG_ABORT_UNLESS(IsLeaf());
93+
// return Exhaust();
94+
// }
95+
96+
public:
97+
bool IsValid() const {
98+
Y_DEBUG_ABORT_UNLESS(IsLeaf() || IsExhausted());
99+
return IsLeaf();
100+
}
101+
102+
TPageId GetPageId() const {
103+
Y_ABORT_UNLESS(IsLeaf());
104+
return State.back().Meta.PageId;
105+
}
106+
107+
TRowId GetRowId() const {
108+
Y_ABORT_UNLESS(IsLeaf());
109+
return State.back().BeginRowId;
110+
}
111+
112+
TRowId GetNextRowId() const {
113+
Y_ABORT_UNLESS(IsLeaf());
114+
return State.back().EndRowId;
115+
}
116+
117+
private:
118+
bool IsRoot() const noexcept {
119+
return State.size() == 1;
120+
}
121+
122+
bool IsExhausted() const noexcept {
123+
return State[0].Pos == Max<TRecIdx>();
124+
}
125+
126+
bool IsLeaf() const noexcept {
127+
// Note: it is possible to have 0 levels in B-Tree
128+
// so we may have exhausted state with leaf (data) node
129+
return State.size() == Meta.LevelsCount + 1 && !IsExhausted();
130+
}
131+
132+
EReady Exhaust() {
133+
while (State.size() > 1) {
134+
State.pop_back();
135+
}
136+
State[0].Pos = Max<TRecIdx>();
137+
return EReady::Gone;
138+
}
139+
140+
bool TryLoad(TNodeState& state) {
141+
if (state.Node) {
142+
return true;
143+
}
144+
145+
auto page = Env->TryGetPage(Part, state.Meta.PageId);
146+
if (page) {
147+
state.Node.emplace(*page);
148+
return true;
149+
}
150+
return false;
151+
}
152+
153+
private:
154+
const TPart* const Part;
155+
IPages* const Env;
156+
const TGroupId GroupId;
157+
const TBtreeIndexMeta Meta;
158+
TVector<TNodeState> State;
159+
};
160+
161+
}

ydb/core/tablet_flat/flat_part_index_iter.h

+18-20
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ class TPartIndexIt {
2424
, EndRowId(groupId.IsMain() && part->Stat.Rows ? part->Stat.Rows : Max<TRowId>())
2525
{ }
2626

27-
EReady Seek(TRowId rowId, bool restart = false) {
27+
EReady Seek(TRowId rowId) {
2828
auto index = TryGetIndex();
2929
if (!index) {
3030
return EReady::Page;
3131
}
3232

33-
Iter = index->LookupRow(rowId, restart ? TIter() : Iter);
33+
Iter = index->LookupRow(rowId, Iter);
3434
return DataOrGone();
3535
}
3636

@@ -54,42 +54,39 @@ class TPartIndexIt {
5454
return DataOrGone();
5555
}
5656

57-
EReady Next() {
58-
auto index = TryGetIndex();
59-
if (!index) {
60-
return EReady::Page;
61-
}
62-
Iter++;
63-
return DataOrGone();
64-
}
65-
66-
EReady Prev() {
57+
EReady SeekLast() {
6758
auto index = TryGetIndex();
6859
if (!index) {
6960
return EReady::Page;
7061
}
62+
Iter = (*index)->End();
7163
if (Iter.Off() == 0) {
72-
Iter = { };
7364
return EReady::Gone;
7465
}
7566
Iter--;
7667
return DataOrGone();
7768
}
7869

79-
EReady SeekLast() {
80-
auto index = TryGetIndex();
81-
if (!index) {
82-
return EReady::Page;
83-
}
84-
Iter = (*index)->End();
70+
EReady Next() {
71+
Y_DEBUG_ABORT_UNLESS(Index);
72+
Y_DEBUG_ABORT_UNLESS(Iter);
73+
Iter++;
74+
return DataOrGone();
75+
}
76+
77+
EReady Prev() {
78+
Y_DEBUG_ABORT_UNLESS(Index);
79+
Y_DEBUG_ABORT_UNLESS(Iter);
8580
if (Iter.Off() == 0) {
81+
Iter = { };
8682
return EReady::Gone;
8783
}
8884
Iter--;
8985
return DataOrGone();
9086
}
9187

9288
bool IsValid() const {
89+
Y_DEBUG_ABORT_UNLESS(Index);
9390
return bool(Iter);
9491
}
9592

@@ -125,10 +122,11 @@ class TPartIndexIt {
125122

126123
TRowId GetNextRowId() const {
127124
Y_ABORT_UNLESS(Index);
125+
Y_ABORT_UNLESS(Iter);
128126
auto next = Iter + 1;
129127
return next
130128
? next->GetRowId()
131-
: Max<TRowId>();
129+
: EndRowId;
132130
}
133131

134132
const TRecord * GetRecord() const {

ydb/core/tablet_flat/flat_stat_part.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@ class TScreenedPartIndexIterator {
5959

6060
EReady Start() {
6161
auto ready = Pos.Seek(0);
62-
FillKey();
62+
if (ready != EReady::Page) {
63+
FillKey();
64+
}
6365

6466
for (auto& g : AltGroups) {
6567
if (g.Pos.Seek(0) == EReady::Page) {

0 commit comments

Comments
 (0)