|
| 1 | +#include "flat_stat_table.h" |
| 2 | +#include "flat_table_subset.h" |
| 3 | +#include "flat_stat_table_btree_index.h" |
| 4 | + |
| 5 | +namespace NKikimr::NTable { |
| 6 | + |
| 7 | +namespace { |
| 8 | + |
| 9 | +using TGroupId = NPage::TGroupId; |
| 10 | +using TFrames = NPage::TFrames; |
| 11 | +using TBtreeIndexNode = NPage::TBtreeIndexNode; |
| 12 | +using TChild = TBtreeIndexNode::TChild; |
| 13 | +using TColumns = TBtreeIndexNode::TColumns; |
| 14 | +using TCells = NPage::TCells; |
| 15 | + |
| 16 | +ui64 GetPrevDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, bool& ready) { |
| 17 | + auto& meta = part->IndexPages.GetBTree(groupId); |
| 18 | + |
| 19 | + if (rowId == 0) { |
| 20 | + return 0; |
| 21 | + } |
| 22 | + if (rowId >= meta.GetRowCount()) { |
| 23 | + return meta.GetDataSize(); |
| 24 | + } |
| 25 | + |
| 26 | + TPageId pageId = meta.GetPageId(); |
| 27 | + ui64 prevDataSize = 0; |
| 28 | + |
| 29 | + for (ui32 height = 0; height < meta.LevelCount; height++) { |
| 30 | + auto page = env->TryGetPage(part, pageId, {}); |
| 31 | + if (!page) { |
| 32 | + ready = false; |
| 33 | + return prevDataSize; |
| 34 | + } |
| 35 | + auto node = TBtreeIndexNode(*page); |
| 36 | + auto pos = node.Seek(rowId); |
| 37 | + |
| 38 | + pageId = node.GetShortChild(pos).GetPageId(); |
| 39 | + if (pos) { |
| 40 | + prevDataSize = node.GetShortChild(pos - 1).GetDataSize(); |
| 41 | + } |
| 42 | + } |
| 43 | + |
| 44 | + return prevDataSize; |
| 45 | +} |
| 46 | + |
| 47 | +ui64 GetPrevHistoricDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, TRowId& historicRowId, bool& ready) { |
| 48 | + Y_ABORT_UNLESS(groupId == TGroupId(0, true)); |
| 49 | + |
| 50 | + auto& meta = part->IndexPages.GetBTree(groupId); |
| 51 | + |
| 52 | + if (rowId == 0) { |
| 53 | + historicRowId = 0; |
| 54 | + return 0; |
| 55 | + } |
| 56 | + if (rowId >= part->IndexPages.GetBTree({}).GetRowCount()) { |
| 57 | + historicRowId = meta.GetRowCount(); |
| 58 | + return meta.GetDataSize(); |
| 59 | + } |
| 60 | + |
| 61 | + TPageId pageId = meta.GetPageId(); |
| 62 | + ui64 prevDataSize = 0; |
| 63 | + historicRowId = 0; |
| 64 | + |
| 65 | + // Minimum key is (startRowId, max, max) |
| 66 | + ui64 startStep = Max<ui64>(); |
| 67 | + ui64 startTxId = Max<ui64>(); |
| 68 | + TCell key1Cells[3] = { |
| 69 | + TCell::Make(rowId), |
| 70 | + TCell::Make(startStep), |
| 71 | + TCell::Make(startTxId), |
| 72 | + }; |
| 73 | + TCells key1{ key1Cells, 3 }; |
| 74 | + |
| 75 | + for (ui32 height = 0; height < meta.LevelCount; height++) { |
| 76 | + auto page = env->TryGetPage(part, pageId, {}); |
| 77 | + if (!page) { |
| 78 | + ready = false; |
| 79 | + return prevDataSize; |
| 80 | + } |
| 81 | + auto node = TBtreeIndexNode(*page); |
| 82 | + auto pos = node.Seek(ESeek::Lower, key1, part->Scheme->HistoryGroup.ColsKeyIdx, part->Scheme->HistoryKeys.Get()); |
| 83 | + |
| 84 | + pageId = node.GetShortChild(pos).GetPageId(); |
| 85 | + if (pos) { |
| 86 | + const auto& prevChild = node.GetShortChild(pos - 1); |
| 87 | + prevDataSize = prevChild.GetDataSize(); |
| 88 | + historicRowId = prevChild.GetRowCount(); |
| 89 | + } |
| 90 | + } |
| 91 | + |
| 92 | + return prevDataSize; |
| 93 | +} |
| 94 | + |
| 95 | +void AddBlobsSize(const TPart* part, TChanneledDataSize& stats, const TFrames* frames, ELargeObj lob, TRowId beginRowId, TRowId endRowId) noexcept { |
| 96 | + ui32 page = frames->Lower(beginRowId, 0, Max<ui32>()); |
| 97 | + |
| 98 | + while (auto &rel = frames->Relation(page)) { |
| 99 | + if (rel.Row < endRowId) { |
| 100 | + auto channel = part->GetPageChannel(lob, page); |
| 101 | + stats.Add(rel.Size, channel); |
| 102 | + ++page; |
| 103 | + } else if (!rel.IsHead()) { |
| 104 | + Y_ABORT("Got unaligned TFrames head record"); |
| 105 | + } else { |
| 106 | + break; |
| 107 | + } |
| 108 | + } |
| 109 | +} |
| 110 | + |
| 111 | +bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsYieldHandler yieldHandler) { |
| 112 | + bool ready = true; |
| 113 | + |
| 114 | + if (!part.Slices || part.Slices->empty()) { |
| 115 | + return true; |
| 116 | + } |
| 117 | + |
| 118 | + if (part->GroupsCount) { // main group |
| 119 | + TGroupId groupId{}; |
| 120 | + auto channel = part->GetGroupChannel(groupId); |
| 121 | + |
| 122 | + for (const auto& slice : *part.Slices) { |
| 123 | + yieldHandler(); |
| 124 | + |
| 125 | + stats.RowCount += slice.EndRowId() - slice.BeginRowId(); |
| 126 | + |
| 127 | + ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); |
| 128 | + ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); |
| 129 | + if (ready && endDataSize > beginDataSize) { |
| 130 | + stats.DataSize.Add(endDataSize - beginDataSize, channel); |
| 131 | + } |
| 132 | + |
| 133 | + if (part->Small) { |
| 134 | + AddBlobsSize(part.Part.Get(), stats.DataSize, part->Small.Get(), ELargeObj::Outer, slice.BeginRowId(), slice.EndRowId()); |
| 135 | + } |
| 136 | + if (part->Large) { |
| 137 | + AddBlobsSize(part.Part.Get(), stats.DataSize, part->Large.Get(), ELargeObj::Extern, slice.BeginRowId(), slice.EndRowId()); |
| 138 | + } |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + for (ui32 groupIndex : xrange<ui32>(1, part->GroupsCount)) { |
| 143 | + TGroupId groupId{groupIndex}; |
| 144 | + auto channel = part->GetGroupChannel(groupId); |
| 145 | + for (const auto& slice : *part.Slices) { |
| 146 | + yieldHandler(); |
| 147 | + |
| 148 | + ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); |
| 149 | + ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); |
| 150 | + if (ready && endDataSize > beginDataSize) { |
| 151 | + stats.DataSize.Add(endDataSize - beginDataSize, channel); |
| 152 | + } |
| 153 | + } |
| 154 | + } |
| 155 | + |
| 156 | + TVector<std::pair<TRowId, TRowId>> historicSlices; |
| 157 | + |
| 158 | + if (part->HistoricGroupsCount) { // main historic group |
| 159 | + TGroupId groupId{0, true}; |
| 160 | + auto channel = part->GetGroupChannel(groupId); |
| 161 | + for (const auto& slice : *part.Slices) { |
| 162 | + yieldHandler(); |
| 163 | + |
| 164 | + TRowId beginRowId, endRowId; |
| 165 | + bool readySlice = true; |
| 166 | + ui64 beginDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, beginRowId, readySlice); |
| 167 | + ui64 endDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, endRowId, readySlice); |
| 168 | + ready &= readySlice; |
| 169 | + if (ready && endDataSize > beginDataSize) { |
| 170 | + stats.DataSize.Add(endDataSize - beginDataSize, channel); |
| 171 | + } |
| 172 | + if (readySlice && endRowId > beginRowId) { |
| 173 | + historicSlices.emplace_back(beginRowId, endRowId); |
| 174 | + } |
| 175 | + } |
| 176 | + } |
| 177 | + |
| 178 | + for (ui32 groupIndex : xrange<ui32>(1, part->HistoricGroupsCount)) { |
| 179 | + TGroupId groupId{groupIndex, true}; |
| 180 | + auto channel = part->GetGroupChannel(groupId); |
| 181 | + for (const auto& slice : historicSlices) { |
| 182 | + yieldHandler(); |
| 183 | + |
| 184 | + ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.first, env, ready); |
| 185 | + ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.second, env, ready); |
| 186 | + if (ready && endDataSize > beginDataSize) { |
| 187 | + stats.DataSize.Add(endDataSize - beginDataSize, channel); |
| 188 | + } |
| 189 | + } |
| 190 | + } |
| 191 | + |
| 192 | + return ready; |
| 193 | +} |
| 194 | + |
| 195 | +} |
| 196 | + |
| 197 | +bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) { |
| 198 | + stats.Clear(); |
| 199 | + |
| 200 | + bool ready = true; |
| 201 | + for (const auto& part : subset.Flatten) { |
| 202 | + stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel()); |
| 203 | + ready &= AddDataSize(part, stats, env, yieldHandler); |
| 204 | + } |
| 205 | + |
| 206 | + if (!ready) { |
| 207 | + return false; |
| 208 | + } |
| 209 | + |
| 210 | + ready &= BuildStatsHistogramsBTreeIndex(subset, stats, |
| 211 | + stats.RowCount / histogramBucketsCount, stats.DataSize.Size / histogramBucketsCount, |
| 212 | + env, yieldHandler); |
| 213 | + |
| 214 | + return ready; |
| 215 | +} |
| 216 | + |
| 217 | +} |
0 commit comments