Skip to content

Commit a95ce0d

Browse files
log(schemeshard): fix message for split/merge logs (#15909)
1 parent 22e1472 commit a95ce0d

File tree

3 files changed

+38
-29
lines changed

3 files changed

+38
-29
lines changed

ydb/core/tx/schemeshard/schemeshard__table_stats.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ bool TTxStoreTableStats::PersistSingleStats(const TPathId& pathId,
425425
TVector<TShardIdx> shardsToMerge;
426426
TString mergeReason;
427427
if ((!index || index->State == NKikimrSchemeOp::EIndexStateReady)
428-
&& table->CheckCanMergePartitions(Self->SplitSettings, forceShardSplitSettings, shardIdx, shardsToMerge, mainTableForIndex, mergeReason)
428+
&& table->CheckCanMergePartitions(Self->SplitSettings, forceShardSplitSettings, shardIdx, Self->ShardInfos[shardIdx].TabletID, shardsToMerge, mainTableForIndex, mergeReason)
429429
) {
430430
TTxId txId = Self->GetCachedTxId(ctx);
431431

ydb/core/tx/schemeshard/schemeshard_info_types.cpp

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1873,7 +1873,8 @@ bool TTableInfo::TryAddShardToMerge(const TSplitSettings& splitSettings,
18731873
const TForceShardSplitSettings& forceShardSplitSettings,
18741874
TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge,
18751875
THashSet<TTabletId>& partOwners, ui64& totalSize, float& totalLoad,
1876-
const TTableInfo* mainTableForIndex, TString& reason) const
1876+
float cpuUsageThreshold, const TTableInfo* mainTableForIndex,
1877+
TString& reason) const
18771878
{
18781879
if (ExpectedPartitionCount + 1 - shardsToMerge.size() <= GetMinPartitionsCount()) {
18791880
return false;
@@ -1907,9 +1908,7 @@ bool TTableInfo::TryAddShardToMerge(const TSplitSettings& splitSettings,
19071908
const auto sizeToMerge = GetSizeToMerge(forceShardSplitSettings);
19081909
if (IsMergeBySizeEnabled(forceShardSplitSettings) && stats->DataSize + totalSize <= sizeToMerge) {
19091910
reason = TStringBuilder() << "merge by size ("
1910-
<< "dataSize: " << stats->DataSize << ", "
1911-
<< "totalSize: " << stats->DataSize + totalSize << ", "
1912-
<< "sizeToMerge: " << sizeToMerge << ")";
1911+
<< "shardSize: " << stats->DataSize << ")";
19131912
canMerge = true;
19141913
}
19151914

@@ -1932,21 +1931,15 @@ bool TTableInfo::TryAddShardToMerge(const TSplitSettings& splitSettings,
19321931
// Check that total load doesn't exceed the limits
19331932
float shardLoad = stats->GetCurrentRawCpuUsage() * 0.000001;
19341933
if (IsMergeByLoadEnabled(mainTableForIndex)) {
1935-
const auto settings = GetEffectiveSplitByLoadSettings(mainTableForIndex);
1936-
i64 cpuPercentage = settings.GetCpuPercentageThreshold();
1937-
float cpuUsageThreshold = 0.01 * (cpuPercentage ? cpuPercentage : (i64)splitSettings.FastSplitCpuPercentageThreshold);
1938-
19391934
// Calculate shard load based on historical data
19401935
TDuration loadDuration = TDuration::Seconds(splitSettings.MergeByLoadMinLowLoadDurationSec);
19411936
shardLoad = 0.01 * stats->GetLatestMaxCpuUsagePercent(now - loadDuration);
19421937

1943-
if (shardLoad + totalLoad > cpuUsageThreshold *0.7)
1938+
if (shardLoad + totalLoad > cpuUsageThreshold)
19441939
return false;
19451940

19461941
reason = TStringBuilder() << "merge by load ("
1947-
<< "shardLoad: " << shardLoad << ", "
1948-
<< "totalLoad: " << shardLoad + totalLoad << ", "
1949-
<< "loadThreshold: " << cpuUsageThreshold * 0.7 << ")";
1942+
<< "shardLoad: " << shardLoad << ")";
19501943
}
19511944

19521945
// Merged shards must not have borrowed parts from the same original tablet
@@ -1966,8 +1959,9 @@ bool TTableInfo::TryAddShardToMerge(const TSplitSettings& splitSettings,
19661959

19671960
bool TTableInfo::CheckCanMergePartitions(const TSplitSettings& splitSettings,
19681961
const TForceShardSplitSettings& forceShardSplitSettings,
1969-
TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge,
1970-
const TTableInfo* mainTableForIndex, TString& reason) const
1962+
TShardIdx shardIdx, const TTabletId& tabletId,
1963+
TVector<TShardIdx>& shardsToMerge, const TTableInfo* mainTableForIndex,
1964+
TString& reason) const
19711965
{
19721966
// Don't split/merge backup tables
19731967
if (IsBackup) {
@@ -1997,28 +1991,43 @@ bool TTableInfo::CheckCanMergePartitions(const TSplitSettings& splitSettings,
19971991
shardsToMerge.clear();
19981992
ui64 totalSize = 0;
19991993
float totalLoad = 0;
1994+
const auto settings = GetEffectiveSplitByLoadSettings(mainTableForIndex);
1995+
const i64 cpuPercentageThreshold = settings.GetCpuPercentageThreshold();
1996+
const float cpuUsageThreshold = 0.01 * (cpuPercentageThreshold ? cpuPercentageThreshold : (i64)splitSettings.FastSplitCpuPercentageThreshold);
1997+
const float cpuMergeThreshold = 0.7 * cpuUsageThreshold;
1998+
20001999
THashSet<TTabletId> partOwners;
2000+
TString shardMergeReason;
20012001

20022002
// Make sure we can actually merge current shard first
2003-
if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, shardIdx, shardsToMerge, partOwners, totalSize, totalLoad, mainTableForIndex, reason)) {
2003+
if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, shardIdx, shardsToMerge, partOwners, totalSize, totalLoad, cpuMergeThreshold, mainTableForIndex, shardMergeReason)) {
20042004
return false;
20052005
}
20062006

2007-
TString mergeReason;
2007+
reason = TStringBuilder() << "shard with tabletId: " << tabletId
2008+
<< " " << shardMergeReason;
2009+
20082010
for (i64 pi = partitionIdx - 1; pi >= 0; --pi) {
2009-
if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, GetPartitions()[pi].ShardIdx, shardsToMerge, partOwners, totalSize, totalLoad, mainTableForIndex, mergeReason)) {
2011+
if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, GetPartitions()[pi].ShardIdx, shardsToMerge, partOwners, totalSize, totalLoad, cpuMergeThreshold, mainTableForIndex, shardMergeReason)) {
20102012
break;
20112013
}
20122014
}
20132015
// make shardsToMerge ordered by partition index
20142016
Reverse(shardsToMerge.begin(), shardsToMerge.end());
20152017

20162018
for (ui64 pi = partitionIdx + 1; pi < GetPartitions().size(); ++pi) {
2017-
if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, GetPartitions()[pi].ShardIdx, shardsToMerge, partOwners, totalSize, totalLoad, mainTableForIndex, mergeReason)) {
2019+
if (!TryAddShardToMerge(splitSettings, forceShardSplitSettings, GetPartitions()[pi].ShardIdx, shardsToMerge, partOwners, totalSize, totalLoad, cpuMergeThreshold, mainTableForIndex, shardMergeReason)) {
20182020
break;
20192021
}
20202022
}
20212023

2024+
reason += TStringBuilder()
2025+
<< ", shardToMergeCount: " << shardsToMerge.size()
2026+
<< ", totalSize: " << totalSize
2027+
<< ", sizeToMerge: " << GetSizeToMerge(forceShardSplitSettings)
2028+
<< ", totalLoad: " << totalLoad
2029+
<< ", loadThreshold: " << cpuMergeThreshold;
2030+
20222031
return shardsToMerge.size() > 1;
20232032
}
20242033

@@ -2082,8 +2091,8 @@ bool TTableInfo::CheckSplitByLoad(
20822091
reason = TStringBuilder() << "split by load ("
20832092
<< "rowCount: " << rowCount << ", "
20842093
<< "minRowCount: " << MIN_ROWS_FOR_SPLIT_BY_LOAD << ", "
2085-
<< "dataSize: " << dataSize << ", "
2086-
<< "minDataSize: " << MIN_SIZE_FOR_SPLIT_BY_LOAD << ", "
2094+
<< "shardSize: " << dataSize << ", "
2095+
<< "minShardSize: " << MIN_SIZE_FOR_SPLIT_BY_LOAD << ", "
20872096
<< "shardCount: " << Stats.PartitionStats.size() << ", "
20882097
<< "maxShardCount: " << maxShards << ", "
20892098
<< "cpuUsage: " << stats.GetCurrentRawCpuUsage() << ", "

ydb/core/tx/schemeshard/schemeshard_info_types.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -685,11 +685,11 @@ struct TTableInfo : public TSimpleRefCount<TTableInfo> {
685685
const TForceShardSplitSettings& forceShardSplitSettings,
686686
TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge,
687687
THashSet<TTabletId>& partOwners, ui64& totalSize, float& totalLoad,
688-
const TTableInfo* mainTableForIndex, TString& reason) const;
688+
float cpuUsageThreshold, const TTableInfo* mainTableForIndex, TString& reason) const;
689689

690690
bool CheckCanMergePartitions(const TSplitSettings& splitSettings,
691691
const TForceShardSplitSettings& forceShardSplitSettings,
692-
TShardIdx shardIdx, TVector<TShardIdx>& shardsToMerge,
692+
TShardIdx shardIdx, const TTabletId& tabletId, TVector<TShardIdx>& shardsToMerge,
693693
const TTableInfo* mainTableForIndex, TString& reason) const;
694694

695695
bool CheckSplitByLoad(
@@ -825,18 +825,18 @@ struct TTableInfo : public TSimpleRefCount<TTableInfo> {
825825
// When shard is over the maximum size we split even when over max partitions
826826
if (dataSize >= params.ForceShardSplitDataSize && !params.DisableForceShardSplit) {
827827
reason = TStringBuilder() << "force split by size ("
828-
<< "dataSize: " << dataSize << ", "
829-
<< "maxDataSize: " << params.ForceShardSplitDataSize << ")";
828+
<< "shardSize: " << dataSize << ", "
829+
<< "maxShardSize: " << params.ForceShardSplitDataSize << ")";
830830

831831
return true;
832832
}
833833
// Otherwise we split when we may add one more partition
834834
if (Partitions.size() < GetMaxPartitionsCount() && dataSize >= GetShardSizeToSplit(params)) {
835835
reason = TStringBuilder() << "split by size ("
836-
<< "partitionCount: " << Partitions.size() << ", "
837-
<< "maxPartitionCount: " << GetMaxPartitionsCount() << ", "
838-
<< "dataSize: " << dataSize << ", "
839-
<< "maxDataSize: " << GetShardSizeToSplit(params) << ")";
836+
<< "shardCount: " << Partitions.size() << ", "
837+
<< "maxShardCount: " << GetMaxPartitionsCount() << ", "
838+
<< "shardSize: " << dataSize << ", "
839+
<< "maxShardSize: " << GetShardSizeToSplit(params) << ")";
840840

841841
return true;
842842
}

0 commit comments

Comments
 (0)