Skip to content

Add BuildStats B-Tree detailed logging #15193

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 5, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions ydb/core/tablet_flat/flat_stat_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
namespace NKikimr {
namespace NTable {

bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) {
bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, ui32 histogramBucketsCount, IPages* env,
TBuildStatsYieldHandler yieldHandler, const TString& logPrefix)
{
stats.Clear();

bool mixedIndex = false;
Expand All @@ -17,9 +19,17 @@ bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, u
}
}

return mixedIndex
LOG_BUILD_STATS("starting for " << (mixedIndex ? "mixed" : "b-tree") << " index");

auto ready = mixedIndex
? BuildStatsMixedIndex(subset, stats, rowCountResolution, dataSizeResolution, env, yieldHandler)
: BuildStatsBTreeIndex(subset, stats, histogramBucketsCount, env, yieldHandler);
: BuildStatsBTreeIndex(subset, stats, histogramBucketsCount, env, yieldHandler, logPrefix);

LOG_BUILD_STATS("finished for " << (mixedIndex ? "mixed" : "b-tree") << " index"
<< " ready: " << ready
<< " stats: " << stats.ToString());

return ready;
}

void GetPartOwners(const TSubset& subset, THashSet<ui64>& partOwners) {
Expand Down
23 changes: 22 additions & 1 deletion ydb/core/tablet_flat/flat_stat_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <util/generic/hash_set.h>

#include <ydb/core/scheme/scheme_tablecell.h>
#include <ydb/library/services/services.pb.h>
#include <ydb/library/actors/core/log.h>

namespace NKikimr {
namespace NTable {
Expand Down Expand Up @@ -123,6 +125,16 @@ struct TStats {
RowCountHistogram.swap(other.RowCountHistogram);
DataSizeHistogram.swap(other.DataSizeHistogram);
}

TString ToString() const noexcept {
return TStringBuilder()
<< "RowCount: " << RowCount
<< " DataSize: " << DataSize.Size
<< " IndexSize: " << IndexSize.Size
<< " ByKeyFilterSize: " << ByKeyFilterSize
<< " RowCountHistogram: " << RowCountHistogram.size()
<< " DataSizeHistogram: " << DataSizeHistogram.size();
}
};

class TKeyAccessSample {
Expand Down Expand Up @@ -193,7 +205,16 @@ class TKeyAccessSample {

using TBuildStatsYieldHandler = std::function<void()>;

bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler);
#define LOG_BUILD_STATS(stream) \
if (auto actorContext = NActors::TlsActivationContext; actorContext) { \
LOG_DEBUG_S(*actorContext, NKikimrServices::DATASHARD_STATS_BUILDER, logPrefix << stream); \
} else { \
Cerr << logPrefix << stream << Endl; \
}

bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, ui32 histogramBucketsCount, IPages* env,
TBuildStatsYieldHandler yieldHandler, const TString& logPrefix = {});

void GetPartOwners(const TSubset& subset, THashSet<ui64>& partOwners);

}}
31 changes: 27 additions & 4 deletions ydb/core/tablet_flat/flat_stat_table_btree_index.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "flat_stat_table.h"
#include "flat_table_subset.h"
#include <util/stream/format.h>
#include "flat_stat_table_btree_index.h"

namespace NKikimr::NTable {
Expand Down Expand Up @@ -108,16 +109,21 @@ void AddBlobsSize(const TPart* part, TChanneledDataSize& stats, const TFrames* f
}
}

bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsYieldHandler yieldHandler) {
bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsYieldHandler yieldHandler, const TString& logPrefix) {
bool ready = true;

if (!part.Slices || part.Slices->empty()) {
return true;
}

auto logAddingGroup = [&](TGroupId groupId){
LOG_BUILD_STATS("adding group " << groupId << " " << part->IndexPages.GetBTree(groupId).ToString());
};

if (part->GroupsCount) { // main group
TGroupId groupId{};
auto channel = part->GetGroupChannel(groupId);
logAddingGroup(groupId);

for (const auto& slice : *part.Slices) {
yieldHandler();
Expand All @@ -129,19 +135,25 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY
if (ready && endDataSize > beginDataSize) {
stats.DataSize.Add(endDataSize - beginDataSize, channel);
}
LOG_BUILD_STATS("added slice [" << slice.BeginRowId() << ", " << slice.EndRowId() << ") data size "
<< "(" << HumanReadableSize(endDataSize, SF_BYTES) << " - " << HumanReadableSize(beginDataSize, SF_BYTES) << ") => " << HumanReadableSize(stats.DataSize.Size, SF_BYTES));

if (part->Small) {
AddBlobsSize(part.Part.Get(), stats.DataSize, part->Small.Get(), ELargeObj::Outer, slice.BeginRowId(), slice.EndRowId());
LOG_BUILD_STATS("added small blobs data size => " << HumanReadableSize(stats.DataSize.Size, SF_BYTES));
}
if (part->Large) {
AddBlobsSize(part.Part.Get(), stats.DataSize, part->Large.Get(), ELargeObj::Extern, slice.BeginRowId(), slice.EndRowId());
LOG_BUILD_STATS("added large blobs data size => " << HumanReadableSize(stats.DataSize.Size, SF_BYTES));
}
}
}

for (ui32 groupIndex : xrange<ui32>(1, part->GroupsCount)) {
TGroupId groupId{groupIndex};
auto channel = part->GetGroupChannel(groupId);
logAddingGroup(groupId);

for (const auto& slice : *part.Slices) {
yieldHandler();

Expand All @@ -150,6 +162,8 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY
if (ready && endDataSize > beginDataSize) {
stats.DataSize.Add(endDataSize - beginDataSize, channel);
}
LOG_BUILD_STATS("added slice [" << slice.BeginRowId() << ", " << slice.EndRowId() << ") data size "
<< "(" << HumanReadableSize(endDataSize, SF_BYTES) << " - " << HumanReadableSize(beginDataSize, SF_BYTES) << ") => " << HumanReadableSize(stats.DataSize.Size, SF_BYTES));
}
}

Expand All @@ -158,6 +172,8 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY
if (part->HistoricGroupsCount) { // main historic group
TGroupId groupId{0, true};
auto channel = part->GetGroupChannel(groupId);
logAddingGroup(groupId);

for (const auto& slice : *part.Slices) {
yieldHandler();

Expand All @@ -169,6 +185,8 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY
if (ready && endDataSize > beginDataSize) {
stats.DataSize.Add(endDataSize - beginDataSize, channel);
}
LOG_BUILD_STATS("added slice [" << slice.BeginRowId() << ", " << slice.EndRowId() << ") data size "
<< "(" << HumanReadableSize(endDataSize, SF_BYTES) << " - " << HumanReadableSize(beginDataSize, SF_BYTES) << ") => " << HumanReadableSize(stats.DataSize.Size, SF_BYTES));
if (readySlice && endRowId > beginRowId) {
historicSlices.emplace_back(beginRowId, endRowId);
}
Expand All @@ -178,6 +196,8 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY
for (ui32 groupIndex : xrange<ui32>(1, part->HistoricGroupsCount)) {
TGroupId groupId{groupIndex, true};
auto channel = part->GetGroupChannel(groupId);
logAddingGroup(groupId);

for (const auto& slice : historicSlices) {
yieldHandler();

Expand All @@ -186,6 +206,8 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY
if (ready && endDataSize > beginDataSize) {
stats.DataSize.Add(endDataSize - beginDataSize, channel);
}
LOG_BUILD_STATS("added slice [" << slice.first << ", " << slice.second << ") data size "
<< "(" << HumanReadableSize(endDataSize, SF_BYTES) << " - " << HumanReadableSize(beginDataSize, SF_BYTES) << ") => " << HumanReadableSize(stats.DataSize.Size, SF_BYTES));
}
}

Expand All @@ -194,14 +216,15 @@ bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsY

}

bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) {
bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler, const TString& logPrefix) {
stats.Clear();

bool ready = true;
for (const auto& part : subset.Flatten) {
LOG_BUILD_STATS("adding part " << part->Label.ToString() << " data size (" << HumanReadableSize(part->DataSize(), SF_BYTES) << " in total)");
stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel());
stats.ByKeyFilterSize += part->ByKey ? part->ByKey->Raw.size() : 0;
ready &= AddDataSize(part, stats, env, yieldHandler);
ready &= AddDataSize(part, stats, env, yieldHandler, logPrefix);
}

if (!ready) {
Expand All @@ -210,7 +233,7 @@ bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBu

ready &= BuildStatsHistogramsBTreeIndex(subset, stats,
stats.RowCount / histogramBucketsCount, stats.DataSize.Size / histogramBucketsCount,
env, yieldHandler);
env, yieldHandler, logPrefix);

return ready;
}
Expand Down
6 changes: 4 additions & 2 deletions ydb/core/tablet_flat/flat_stat_table_btree_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@

namespace NKikimr::NTable {

bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler);
bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env,
TBuildStatsYieldHandler yieldHandler, const TString& logPrefix = "");

bool BuildStatsHistogramsBTreeIndex(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env, TBuildStatsYieldHandler yieldHandler);
bool BuildStatsHistogramsBTreeIndex(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env,
TBuildStatsYieldHandler yieldHandler, const TString& logPrefix = "");


}
Loading
Loading