Skip to content

Commit 9908e6c

Browse files
authored
Add BuildStats B-Tree detailed logging (#15193)
1 parent 5a1e03d commit 9908e6c

9 files changed

+206
-42
lines changed

ydb/core/tablet_flat/flat_stat_table.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
namespace NKikimr {
88
namespace NTable {
99

10-
bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) {
10+
bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, ui32 histogramBucketsCount, IPages* env,
11+
TBuildStatsYieldHandler yieldHandler, const TString& logPrefix)
12+
{
1113
stats.Clear();
1214

1315
bool mixedIndex = false;
@@ -17,9 +19,17 @@ bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, u
1719
}
1820
}
1921

20-
return mixedIndex
22+
LOG_BUILD_STATS("starting for " << (mixedIndex ? "mixed" : "b-tree") << " index");
23+
24+
auto ready = mixedIndex
2125
? BuildStatsMixedIndex(subset, stats, rowCountResolution, dataSizeResolution, env, yieldHandler)
22-
: BuildStatsBTreeIndex(subset, stats, histogramBucketsCount, env, yieldHandler);
26+
: BuildStatsBTreeIndex(subset, stats, histogramBucketsCount, env, yieldHandler, logPrefix);
27+
28+
LOG_BUILD_STATS("finished for " << (mixedIndex ? "mixed" : "b-tree") << " index"
29+
<< " ready: " << ready
30+
<< " stats: " << stats.ToString());
31+
32+
return ready;
2333
}
2434

2535
void GetPartOwners(const TSubset& subset, THashSet<ui64>& partOwners) {

ydb/core/tablet_flat/flat_stat_table.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#include <util/generic/hash_set.h>
88

99
#include <ydb/core/scheme/scheme_tablecell.h>
10+
#include <ydb/library/services/services.pb.h>
11+
#include <ydb/library/actors/core/log.h>
1012

1113
namespace NKikimr {
1214
namespace NTable {
@@ -123,6 +125,16 @@ struct TStats {
123125
RowCountHistogram.swap(other.RowCountHistogram);
124126
DataSizeHistogram.swap(other.DataSizeHistogram);
125127
}
128+
129+
TString ToString() const noexcept {
130+
return TStringBuilder()
131+
<< "RowCount: " << RowCount
132+
<< " DataSize: " << DataSize.Size
133+
<< " IndexSize: " << IndexSize.Size
134+
<< " ByKeyFilterSize: " << ByKeyFilterSize
135+
<< " RowCountHistogram: " << RowCountHistogram.size()
136+
<< " DataSizeHistogram: " << DataSizeHistogram.size();
137+
}
126138
};
127139

128140
class TKeyAccessSample {
@@ -193,7 +205,16 @@ class TKeyAccessSample {
193205

194206
using TBuildStatsYieldHandler = std::function<void()>;
195207

196-
bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler);
208+
#define LOG_BUILD_STATS(stream) \
209+
if (auto actorContext = NActors::TlsActivationContext; actorContext) { \
210+
LOG_TRACE_S(*actorContext, NKikimrServices::TABLET_STATS_BUILDER, logPrefix << stream); \
211+
} else { \
212+
Cerr << logPrefix << stream << Endl; \
213+
}
214+
215+
bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, ui32 histogramBucketsCount, IPages* env,
216+
TBuildStatsYieldHandler yieldHandler, const TString& logPrefix = {});
217+
197218
void GetPartOwners(const TSubset& subset, THashSet<ui64>& partOwners);
198219

199220
}}

ydb/core/tablet_flat/flat_stat_table_btree_index.cpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "flat_stat_table.h"
22
#include "flat_table_subset.h"
3+
#include <util/stream/format.h>
34
#include "flat_stat_table_btree_index.h"
45

56
namespace NKikimr::NTable {
@@ -108,16 +109,21 @@ void AddBlobsSize(const TPart* part, TChanneledDataSize& stats, const TFrames* f
108109
}
109110
}
110111

111-
bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsYieldHandler yieldHandler) {
112+
bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsYieldHandler yieldHandler, const TString& logPrefix) {
112113
bool ready = true;
113114

114115
if (!part.Slices || part.Slices->empty()) {
115116
return true;
116117
}
117118

119+
auto logAddingGroup = [&](TGroupId groupId){
120+
LOG_BUILD_STATS("adding group " << groupId << " " << part->IndexPages.GetBTree(groupId).ToString());
121+
};
122+
118123
if (part->GroupsCount) { // main group
119124
TGroupId groupId{};
120125
auto channel = part->GetGroupChannel(groupId);
126+
logAddingGroup(groupId);
121127

122128
for (const auto& slice : *part.Slices) {
123129
yieldHandler();
@@ -129,19 +135,25 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY
129135
if (ready && endDataSize > beginDataSize) {
130136
stats.DataSize.Add(endDataSize - beginDataSize, channel);
131137
}
138+
LOG_BUILD_STATS("added slice [" << slice.BeginRowId() << ", " << slice.EndRowId() << ") data size "
139+
<< "(" << HumanReadableSize(endDataSize, SF_BYTES) << " - " << HumanReadableSize(beginDataSize, SF_BYTES) << ") => " << HumanReadableSize(stats.DataSize.Size, SF_BYTES));
132140

133141
if (part->Small) {
134142
AddBlobsSize(part.Part.Get(), stats.DataSize, part->Small.Get(), ELargeObj::Outer, slice.BeginRowId(), slice.EndRowId());
143+
LOG_BUILD_STATS("added small blobs data size => " << HumanReadableSize(stats.DataSize.Size, SF_BYTES));
135144
}
136145
if (part->Large) {
137146
AddBlobsSize(part.Part.Get(), stats.DataSize, part->Large.Get(), ELargeObj::Extern, slice.BeginRowId(), slice.EndRowId());
147+
LOG_BUILD_STATS("added large blobs data size => " << HumanReadableSize(stats.DataSize.Size, SF_BYTES));
138148
}
139149
}
140150
}
141151

142152
for (ui32 groupIndex : xrange<ui32>(1, part->GroupsCount)) {
143153
TGroupId groupId{groupIndex};
144154
auto channel = part->GetGroupChannel(groupId);
155+
logAddingGroup(groupId);
156+
145157
for (const auto& slice : *part.Slices) {
146158
yieldHandler();
147159

@@ -150,6 +162,8 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY
150162
if (ready && endDataSize > beginDataSize) {
151163
stats.DataSize.Add(endDataSize - beginDataSize, channel);
152164
}
165+
LOG_BUILD_STATS("added slice [" << slice.BeginRowId() << ", " << slice.EndRowId() << ") data size "
166+
<< "(" << HumanReadableSize(endDataSize, SF_BYTES) << " - " << HumanReadableSize(beginDataSize, SF_BYTES) << ") => " << HumanReadableSize(stats.DataSize.Size, SF_BYTES));
153167
}
154168
}
155169

@@ -158,6 +172,8 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY
158172
if (part->HistoricGroupsCount) { // main historic group
159173
TGroupId groupId{0, true};
160174
auto channel = part->GetGroupChannel(groupId);
175+
logAddingGroup(groupId);
176+
161177
for (const auto& slice : *part.Slices) {
162178
yieldHandler();
163179

@@ -169,6 +185,8 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY
169185
if (ready && endDataSize > beginDataSize) {
170186
stats.DataSize.Add(endDataSize - beginDataSize, channel);
171187
}
188+
LOG_BUILD_STATS("added slice [" << slice.BeginRowId() << ", " << slice.EndRowId() << ") data size "
189+
<< "(" << HumanReadableSize(endDataSize, SF_BYTES) << " - " << HumanReadableSize(beginDataSize, SF_BYTES) << ") => " << HumanReadableSize(stats.DataSize.Size, SF_BYTES));
172190
if (readySlice && endRowId > beginRowId) {
173191
historicSlices.emplace_back(beginRowId, endRowId);
174192
}
@@ -178,6 +196,8 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY
178196
for (ui32 groupIndex : xrange<ui32>(1, part->HistoricGroupsCount)) {
179197
TGroupId groupId{groupIndex, true};
180198
auto channel = part->GetGroupChannel(groupId);
199+
logAddingGroup(groupId);
200+
181201
for (const auto& slice : historicSlices) {
182202
yieldHandler();
183203

@@ -186,6 +206,8 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY
186206
if (ready && endDataSize > beginDataSize) {
187207
stats.DataSize.Add(endDataSize - beginDataSize, channel);
188208
}
209+
LOG_BUILD_STATS("added slice [" << slice.first << ", " << slice.second << ") data size "
210+
<< "(" << HumanReadableSize(endDataSize, SF_BYTES) << " - " << HumanReadableSize(beginDataSize, SF_BYTES) << ") => " << HumanReadableSize(stats.DataSize.Size, SF_BYTES));
189211
}
190212
}
191213

@@ -194,14 +216,15 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY
194216

195217
}
196218

197-
bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) {
219+
bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler, const TString& logPrefix) {
198220
stats.Clear();
199221

200222
bool ready = true;
201223
for (const auto& part : subset.Flatten) {
224+
LOG_BUILD_STATS("adding part " << part->Label.ToString() << " data size (" << HumanReadableSize(part->DataSize(), SF_BYTES) << " in total)");
202225
stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel());
203226
stats.ByKeyFilterSize += part->ByKey ? part->ByKey->Raw.size() : 0;
204-
ready &= AddDataSize(part, stats, env, yieldHandler);
227+
ready &= AddDataSize(part, stats, env, yieldHandler, logPrefix);
205228
}
206229

207230
if (!ready) {
@@ -210,7 +233,7 @@ bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBu
210233

211234
ready &= BuildStatsHistogramsBTreeIndex(subset, stats,
212235
stats.RowCount / histogramBucketsCount, stats.DataSize.Size / histogramBucketsCount,
213-
env, yieldHandler);
236+
env, yieldHandler, logPrefix);
214237

215238
return ready;
216239
}

ydb/core/tablet_flat/flat_stat_table_btree_index.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55

66
namespace NKikimr::NTable {
77

8-
bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler);
8+
bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env,
9+
TBuildStatsYieldHandler yieldHandler, const TString& logPrefix = "");
910

10-
bool BuildStatsHistogramsBTreeIndex(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env, TBuildStatsYieldHandler yieldHandler);
11+
bool BuildStatsHistogramsBTreeIndex(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env,
12+
TBuildStatsYieldHandler yieldHandler, const TString& logPrefix = "");
1113

1214

1315
}

0 commit comments

Comments
 (0)