Skip to content

Commit 83a86c2

Browse files
authored
schemeshard: preserialize Table.SplitBoundary for describe result (ydb-platform#6648)
Preserialize table's split boundaries the same way table partitions are. The size of both depend on the same variable: number of shards in the table, but TablePartitions was preserialized (and cached) while Table.SplitBoundaries wasn't. Preserializing all potentially huge parts of DescribeSchemeResult message before it gets to the interconnect saves interconnect actors additional serialization costs. And when partitioning of the huge tables goes through the period of a rapid change that additional serialization causes interconnect to overload. Single shortcoming though: preserialized SplitBoundary is not used (cannot be used) when boundaries of the index tables are requested through describe request on table index. KIKIMR-21686
1 parent c5d412c commit 83a86c2

7 files changed

+115
-86
lines changed

ydb/core/tx/schemeshard/schemeshard__operation_alter_continuous_backup.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ TVector<ISubOperation::TPtr> CreateAlterContinuousBackup(TOperationId opId, cons
8383
const NScheme::TTypeRegistry* typeRegistry = AppData(context.Ctx)->TypeRegistry;
8484

8585
NKikimrSchemeOp::TTableDescription schema;
86-
context.SS->DescribeTable(table, typeRegistry, true, false, &schema);
86+
context.SS->DescribeTable(table, typeRegistry, true, &schema);
8787
schema.MutablePartitionConfig()->CopyFrom(table->TableDescription.GetPartitionConfig());
8888

8989
TString errStr;

ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ void PrepareScheme(NKikimrSchemeOp::TTableDescription* schema, const TString& na
1414
const NScheme::TTypeRegistry* typeRegistry = AppData(context.Ctx)->TypeRegistry;
1515

1616
NKikimrSchemeOp::TTableDescription completedSchema;
17-
context.SS->DescribeTable(srcTableInfo, typeRegistry, true, false, &completedSchema);
17+
context.SS->DescribeTable(srcTableInfo, typeRegistry, true, &completedSchema);
1818
completedSchema.SetName(name);
1919

2020
//inherit all from Src except PartitionConfig, PartitionConfig could be altered

ydb/core/tx/schemeshard/schemeshard_impl.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -598,8 +598,9 @@ void TSchemeShard::ClearDescribePathCaches(const TPathElement::TPtr node, bool f
598598
} else if (node->PathType == NKikimrSchemeOp::EPathType::EPathTypeTable) {
599599
Y_ABORT_UNLESS(Tables.contains(node->PathId));
600600
TTableInfo::TPtr tabletInfo = Tables.at(node->PathId);
601-
tabletInfo->PreSerializedPathDescription.clear();
602-
tabletInfo->PreSerializedPathDescriptionWithoutRangeKey.clear();
601+
tabletInfo->PreserializedTablePartitions.clear();
602+
tabletInfo->PreserializedTablePartitionsNoKeys.clear();
603+
tabletInfo->PreserializedTableSplitBoundaries.clear();
603604
}
604605
}
605606

ydb/core/tx/schemeshard/schemeshard_impl.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,7 +1015,7 @@ class TSchemeShard
10151015
void FillAsyncIndexInfo(const TPathId& tableId, NKikimrTxDataShard::TFlatSchemeTransaction& tx);
10161016

10171017
void DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme::TTypeRegistry* typeRegistry,
1018-
bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TTableDescription* entry) const;
1018+
bool fillConfig, NKikimrSchemeOp::TTableDescription* entry) const;
10191019
void DescribeTableIndex(const TPathId& pathId, const TString& name,
10201020
bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TIndexDescription& entry
10211021
) const;
@@ -1031,7 +1031,6 @@ class TSchemeShard
10311031
void DescribeReplication(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TReplicationDescription& desc);
10321032
void DescribeReplication(const TPathId& pathId, const TString& name, TReplicationInfo::TPtr info, NKikimrSchemeOp::TReplicationDescription& desc);
10331033
void DescribeBlobDepot(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TBlobDepotDescription& desc);
1034-
static void FillTableBoundaries(const TTableInfo::TPtr tableInfo, google::protobuf::RepeatedPtrField<NKikimrSchemeOp::TSplitBoundary>& boundaries);
10351034

10361035
void Handle(NKikimr::NOlap::NBackground::TEvExecuteGeneralLocalTransaction::TPtr& ev, const TActorContext& ctx);
10371036
void Handle(NKikimr::NOlap::NBackground::TEvRemoveSession::TPtr& ev, const TActorContext& ctx);

ydb/core/tx/schemeshard/schemeshard_info_types.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -362,10 +362,10 @@ TTableInfo::TAlterDataPtr TTableInfo::CreateAlterData(
362362
const TTableInfo::TColumn& sourceColumn = source->Columns[colId];
363363

364364
if (col.HasDefaultFromSequence()) {
365-
if (sourceColumn.PType.GetTypeId() != NScheme::NTypeIds::Int64
365+
if (sourceColumn.PType.GetTypeId() != NScheme::NTypeIds::Int64
366366
&& NPg::PgTypeIdFromTypeDesc(sourceColumn.PType.GetTypeDesc()) != INT8OID) {
367-
TString sequenceType = sourceColumn.PType.GetTypeId() == NScheme::NTypeIds::Pg
368-
? NPg::PgTypeNameFromTypeDesc(NPg::TypeDescFromPgTypeId(INT8OID))
367+
TString sequenceType = sourceColumn.PType.GetTypeId() == NScheme::NTypeIds::Pg
368+
? NPg::PgTypeNameFromTypeDesc(NPg::TypeDescFromPgTypeId(INT8OID))
369369
: NScheme::TypeName(NScheme::NTypeIds::Int64);
370370
errStr = Sprintf(
371371
"Sequence value type '%s' must be equal to the column type '%s'", sequenceType.c_str(),
@@ -423,7 +423,7 @@ TTableInfo::TAlterDataPtr TTableInfo::CreateAlterData(
423423
return nullptr;
424424
default:
425425
break;
426-
}
426+
}
427427
}
428428
} else {
429429
auto* typeDesc = NPg::TypeDescFromPgTypeName(typeName);
@@ -1586,8 +1586,9 @@ void TTableInfo::SetPartitioning(TVector<TTableShardInfo>&& newPartitioning) {
15861586
Stats.PartitionStats.swap(newPartitionStats);
15871587
Stats.Aggregated = newAggregatedStats;
15881588
Partitions.swap(newPartitioning);
1589-
PreSerializedPathDescription.clear();
1590-
PreSerializedPathDescriptionWithoutRangeKey.clear();
1589+
PreserializedTablePartitions.clear();
1590+
PreserializedTablePartitionsNoKeys.clear();
1591+
PreserializedTableSplitBoundaries.clear();
15911592

15921593
CondEraseSchedule.clear();
15931594
InFlightCondErase.clear();

ydb/core/tx/schemeshard/schemeshard_info_types.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -436,8 +436,11 @@ struct TTableInfo : public TSimpleRefCount<TTableInfo> {
436436
TMap<TTxId, TBackupRestoreResult> BackupHistory;
437437
TMap<TTxId, TBackupRestoreResult> RestoreHistory;
438438

439-
TString PreSerializedPathDescription;
440-
TString PreSerializedPathDescriptionWithoutRangeKey;
439+
// Preserialized TDescribeSchemeResult with PathDescription.TablePartitions field filled
440+
TString PreserializedTablePartitions;
441+
TString PreserializedTablePartitionsNoKeys;
442+
// Preserialized TDescribeSchemeResult with PathDescription.Table.SplitBoundary field filled
443+
TString PreserializedTableSplitBoundaries;
441444

442445
THashMap<TShardIdx, NKikimrSchemeOp::TPartitionConfig> PerShardPartitionConfig;
443446

ydb/core/tx/schemeshard/schemeshard_path_describer.cpp

Lines changed: 97 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,68 @@ void TPathDescriber::DescribeDir(const TPath& path) {
223223
DescribeChildren(path);
224224
}
225225

226+
void FillTableBoundaries(
227+
google::protobuf::RepeatedPtrField<NKikimrSchemeOp::TSplitBoundary>* result,
228+
const TTableInfo::TPtr tableInfo
229+
) {
230+
TString errStr;
231+
// Number of split boundaries equals to number of partitions - 1
232+
result->Reserve(tableInfo->GetPartitions().size() - 1);
233+
for (ui32 pi = 0; pi < tableInfo->GetPartitions().size() - 1; ++pi) {
234+
const auto& p = tableInfo->GetPartitions()[pi];
235+
TSerializedCellVec endKey(p.EndOfRange);
236+
auto boundary = result->Add()->MutableKeyPrefix();
237+
for (ui32 ki = 0; ki < endKey.GetCells().size(); ++ki){
238+
const auto& c = endKey.GetCells()[ki];
239+
auto type = tableInfo->Columns[tableInfo->KeyColumnIds[ki]].PType;
240+
bool ok = NMiniKQL::CellToValue(type, c, *boundary->AddTuple(), errStr);
241+
Y_ABORT_UNLESS(ok, "Failed to build key tuple at position %" PRIu32 " error: %s", ki, errStr.data());
242+
}
243+
}
244+
}
245+
246+
void FillTablePartitions(
247+
google::protobuf::RepeatedPtrField<NKikimrSchemeOp::TTablePartition>* result,
248+
const TTableInfo::TPtr tableInfo,
249+
const THashMap<TShardIdx, TShardInfo>& shardInfos,
250+
bool includeKeys
251+
) {
252+
result->Reserve(tableInfo->GetPartitions().size());
253+
for (auto& p : tableInfo->GetPartitions()) {
254+
const auto& tabletId = ui64(shardInfos.at(p.ShardIdx).TabletID);
255+
const auto& key = p.EndOfRange;
256+
257+
auto part = result->Add();
258+
part->SetDatashardId(tabletId);
259+
if (includeKeys) {
260+
// Currently we only support uniform partitioning where each range is [start, end)
261+
// +inf as the end of the last range is represented by empty TCell vector
262+
part->SetIsPoint(false);
263+
part->SetIsInclusive(false);
264+
part->SetEndOfRangeKeyPrefix(key);
265+
}
266+
}
267+
}
268+
269+
const TString& GetSerializedTablePartitions(
270+
const TTableInfo::TPtr tableInfo,
271+
const THashMap<TShardIdx, TShardInfo>& shardInfos,
272+
bool returnRangeKey
273+
) {
274+
TString& cache = (returnRangeKey
275+
? tableInfo->PreserializedTablePartitions
276+
: tableInfo->PreserializedTablePartitionsNoKeys
277+
);
278+
279+
if (cache.empty()) {
280+
NKikimrScheme::TEvDescribeSchemeResult result;
281+
FillTablePartitions(result.MutablePathDescription()->MutableTablePartitions(), tableInfo, shardInfos, returnRangeKey);
282+
Y_PROTOBUF_SUPPRESS_NODISCARD result.SerializeToString(&cache);
283+
}
284+
285+
return cache;
286+
}
287+
226288
void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPathElement::TPtr pathEl) {
227289
const NScheme::TTypeRegistry* typeRegistry = AppData(ctx)->TypeRegistry;
228290
const TTableInfo::TPtr tableInfo = *Self->Tables.FindPtr(pathId);
@@ -244,50 +306,30 @@ void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPa
244306
returnRangeKey = Params.GetOptions().GetReturnRangeKey();
245307
}
246308

247-
Self->DescribeTable(tableInfo, typeRegistry, returnConfig, returnBoundaries, entry);
309+
Self->DescribeTable(tableInfo, typeRegistry, returnConfig, entry);
248310
entry->SetName(pathEl->Name);
249311

250-
if (returnPartitioning) {
251-
// partitions
252-
if (tableInfo->PreSerializedPathDescription.empty()) {
312+
if (returnBoundaries) {
313+
// split boundaries (split keys without shard's tablet-ids)
314+
if (tableInfo->PreserializedTableSplitBoundaries.empty()) {
253315
NKikimrScheme::TEvDescribeSchemeResult preSerializedResult;
254-
NKikimrScheme::TEvDescribeSchemeResult preSerializedResultWithoutRangeKey;
255-
256-
NKikimrSchemeOp::TPathDescription& pathDescription = *preSerializedResult.MutablePathDescription();
257-
NKikimrSchemeOp::TPathDescription& pathDescriptionWithoutRangeKey = *preSerializedResultWithoutRangeKey.MutablePathDescription();
258-
259-
pathDescription.MutableTablePartitions()->Reserve(tableInfo->GetPartitions().size());
260-
pathDescriptionWithoutRangeKey.MutableTablePartitions()->Reserve(tableInfo->GetPartitions().size());
261-
for (auto& p : tableInfo->GetPartitions()) {
262-
auto part = pathDescription.AddTablePartitions();
263-
auto partWithoutRangeKey = pathDescriptionWithoutRangeKey.AddTablePartitions();
264-
auto datashardIdx = p.ShardIdx;
265-
auto datashardTabletId = Self->ShardInfos[datashardIdx].TabletID;
266-
// Currently we only support uniform partitioning where each range is [start, end)
267-
// +inf as the end of the last range is represented by empty TCell vector
268-
part->SetDatashardId(ui64(datashardTabletId));
269-
partWithoutRangeKey->SetDatashardId(ui64(datashardTabletId));
270-
271-
part->SetIsPoint(false);
272-
partWithoutRangeKey->SetIsPoint(false);
273-
274-
part->SetIsInclusive(false);
275-
partWithoutRangeKey->SetIsInclusive(false);
276-
277-
part->SetEndOfRangeKeyPrefix(p.EndOfRange);
278-
}
279-
Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResult.SerializeToString(&tableInfo->PreSerializedPathDescription);
280-
Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResultWithoutRangeKey.SerializeToString(&tableInfo->PreSerializedPathDescriptionWithoutRangeKey);
281-
}
282-
if (returnRangeKey) {
283-
Result->PreSerializedData += tableInfo->PreSerializedPathDescription;
284-
} else {
285-
Result->PreSerializedData += tableInfo->PreSerializedPathDescriptionWithoutRangeKey;
286-
}
287-
if (!pathEl->IsCreateFinished()) {
288-
tableInfo->PreSerializedPathDescription.clear(); // KIKIMR-4337
289-
tableInfo->PreSerializedPathDescriptionWithoutRangeKey.clear();
316+
auto& tableDesc = *preSerializedResult.MutablePathDescription()->MutableTable();
317+
FillTableBoundaries(tableDesc.MutableSplitBoundary(), tableInfo);
318+
Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResult.SerializeToString(&tableInfo->PreserializedTableSplitBoundaries);
290319
}
320+
Result->PreSerializedData += tableInfo->PreserializedTableSplitBoundaries;
321+
}
322+
323+
if (returnPartitioning) {
324+
// partitions (shard tablet-ids with range keys)
325+
Result->PreSerializedData += GetSerializedTablePartitions(tableInfo, Self->ShardInfos, returnRangeKey);
326+
}
327+
328+
// KIKIMR-4337: table info is in flux until table is finally created
329+
if (!pathEl->IsCreateFinished()) {
330+
tableInfo->PreserializedTablePartitions.clear();
331+
tableInfo->PreserializedTablePartitionsNoKeys.clear();
332+
tableInfo->PreserializedTableSplitBoundaries.clear();
291333
}
292334

293335
FillAggregatedStats(*Result->Record.MutablePathDescription(), tableInfo->GetStats());
@@ -1128,8 +1170,12 @@ THolder<TEvSchemeShard::TEvDescribeSchemeResultBuilder> DescribePath(
11281170
return DescribePath(self, ctx, pathId, options);
11291171
}
11301172

1131-
void TSchemeShard::DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme::TTypeRegistry* typeRegistry,
1132-
bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TTableDescription* entry) const
1173+
void TSchemeShard::DescribeTable(
1174+
const TTableInfo::TPtr tableInfo,
1175+
const NScheme::TTypeRegistry* typeRegistry,
1176+
bool fillConfig,
1177+
NKikimrSchemeOp::TTableDescription* entry
1178+
) const
11331179
{
11341180
Y_UNUSED(typeRegistry);
11351181
THashMap<ui32, TString> familyNames;
@@ -1198,10 +1244,6 @@ void TSchemeShard::DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme
11981244
FillPartitionConfig(tableInfo->PartitionConfig(), *entry->MutablePartitionConfig());
11991245
}
12001246

1201-
if (fillBoundaries) {
1202-
FillTableBoundaries(tableInfo, *entry->MutableSplitBoundary());
1203-
}
1204-
12051247
if (tableInfo->HasTTLSettings()) {
12061248
entry->MutableTTLSettings()->CopyFrom(tableInfo->TTLSettings());
12071249
}
@@ -1244,32 +1286,32 @@ void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name
12441286
*entry.MutableDataColumnNames()->Add() = dataColumns;
12451287
}
12461288

1247-
auto* indexPath = PathsById.FindPtr(pathId);
1289+
auto indexPath = *PathsById.FindPtr(pathId);
12481290
Y_ABORT_UNLESS(indexPath);
12491291
const ui8 expectedIndexImplTableCount = indexInfo->Type == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree ? 2 : 1;
1250-
Y_ABORT_UNLESS((*indexPath)->GetChildren().size() == expectedIndexImplTableCount);
1292+
Y_ABORT_UNLESS(indexPath->GetChildren().size() == expectedIndexImplTableCount);
12511293

12521294
ui64 dataSize = 0;
1253-
for (const auto& indexImplTablePathId : (*indexPath)->GetChildren()) {
1254-
auto* tableInfo = Tables.FindPtr(indexImplTablePathId.second);
1295+
for (const auto& indexImplTablePathId : indexPath->GetChildren()) {
1296+
auto tableInfo = *Tables.FindPtr(indexImplTablePathId.second);
12551297
Y_ABORT_UNLESS(tableInfo);
12561298

1257-
const auto& tableStats = (*tableInfo)->GetStats().Aggregated;
1299+
const auto& tableStats = tableInfo->GetStats().Aggregated;
12581300
dataSize += tableStats.DataSize + tableStats.IndexSize;
12591301

12601302
auto* tableDescription = entry.AddIndexImplTableDescriptions();
12611303
if (fillConfig) {
1262-
FillPartitionConfig((*tableInfo)->PartitionConfig(), *tableDescription->MutablePartitionConfig());
1304+
FillPartitionConfig(tableInfo->PartitionConfig(), *tableDescription->MutablePartitionConfig());
12631305
}
12641306
if (fillBoundaries) {
1265-
FillTableBoundaries(*tableInfo, *tableDescription->MutableSplitBoundary());
1307+
FillTableBoundaries(tableDescription->MutableSplitBoundary(), tableInfo);
12661308
}
12671309
}
12681310
entry.SetDataSize(dataSize);
12691311

12701312
if (indexInfo->Type == NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree) {
12711313
if (const auto* vectorIndexKmeansTreeDescription = std::get_if<NKikimrSchemeOp::TVectorIndexKmeansTreeDescription>(&indexInfo->SpecializedIndexDescription)) {
1272-
const auto& indexInfoSettings = vectorIndexKmeansTreeDescription->GetSettings();
1314+
const auto& indexInfoSettings = vectorIndexKmeansTreeDescription->GetSettings();
12731315
auto entrySettings = entry.MutableVectorIndexKmeansTreeDescription()->MutableSettings();
12741316
if (indexInfoSettings.has_distance())
12751317
entrySettings->set_distance(indexInfoSettings.distance());
@@ -1283,7 +1325,7 @@ void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name
12831325
Y_FAIL_S("SpecializedIndexDescription should be set");
12841326
}
12851327
}
1286-
1328+
12871329
}
12881330

12891331
void TSchemeShard::DescribeCdcStream(const TPathId& pathId, const TString& name,
@@ -1429,22 +1471,5 @@ void TSchemeShard::DescribeBlobDepot(const TPathId& pathId, const TString& name,
14291471
desc.SetTabletId(static_cast<ui64>(it->second->BlobDepotTabletId));
14301472
}
14311473

1432-
void TSchemeShard::FillTableBoundaries(const TTableInfo::TPtr tableInfo, google::protobuf::RepeatedPtrField<NKikimrSchemeOp::TSplitBoundary>& boundaries) {
1433-
TString errStr;
1434-
// Number of split boundaries equals to number of partitions - 1
1435-
boundaries.Reserve(tableInfo->GetPartitions().size() - 1);
1436-
for (ui32 pi = 0; pi < tableInfo->GetPartitions().size() - 1; ++pi) {
1437-
const auto& p = tableInfo->GetPartitions()[pi];
1438-
TSerializedCellVec endKey(p.EndOfRange);
1439-
auto boundary = boundaries.Add()->MutableKeyPrefix();
1440-
for (ui32 ki = 0; ki < endKey.GetCells().size(); ++ki){
1441-
const auto& c = endKey.GetCells()[ki];
1442-
auto type = tableInfo->Columns[tableInfo->KeyColumnIds[ki]].PType;
1443-
bool ok = NMiniKQL::CellToValue(type, c, *boundary->AddTuple(), errStr);
1444-
Y_ABORT_UNLESS(ok, "Failed to build key tuple at position %" PRIu32 " error: %s", ki, errStr.data());
1445-
}
1446-
}
1447-
}
1448-
14491474
} // NSchemeShard
14501475
} // NKikimr

0 commit comments

Comments
 (0)