From 011e0a3e1de575227dfe4fa94031c9e7a233d07d Mon Sep 17 00:00:00 2001 From: azevaykin Date: Thu, 18 Jul 2024 06:48:18 +0000 Subject: [PATCH 1/3] Vector index build preparation in SchemeShard --- ydb/core/base/table_index.cpp | 4 +++ ydb/core/base/table_index.h | 5 ++++ ...hemeshard__operation_apply_build_index.cpp | 30 ++++++++++++------- .../schemeshard_build_index__create.cpp | 10 +++++-- .../schemeshard_build_index__progress.cpp | 7 +++++ .../tx/schemeshard/schemeshard_info_types.cpp | 4 +++ .../tx/schemeshard/schemeshard_info_types.h | 2 ++ ydb/core/tx/schemeshard/schemeshard_utils.cpp | 4 +-- 8 files changed, 50 insertions(+), 16 deletions(-) diff --git a/ydb/core/base/table_index.cpp b/ydb/core/base/table_index.cpp index bc6bdea57e40..4a42e61ed653 100644 --- a/ydb/core/base/table_index.cpp +++ b/ydb/core/base/table_index.cpp @@ -140,5 +140,9 @@ bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTable return true; } +bool IsImplTable(std::string_view tableName) { + return std::find(std::begin(ImplTables), std::end(ImplTables), tableName) != std::end(ImplTables); +} + } } diff --git a/ydb/core/base/table_index.h b/ydb/core/base/table_index.h index 7d7af7b915d0..f6592d07dca2 100644 --- a/ydb/core/base/table_index.h +++ b/ydb/core/base/table_index.h @@ -21,8 +21,13 @@ struct TIndexColumns { TVector DataColumns; }; +inline constexpr const char* ImplTable = "indexImplTable"; +inline constexpr std::string_view ImplTables[] = {ImplTable, NTableVectorKmeansTreeIndex::LevelTable, NTableVectorKmeansTreeIndex::PostingTable}; + bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index, TString& explain); TTableColumns CalcTableImplDescription(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index); +bool IsImplTable(std::string_view tableName); + } } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_apply_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_apply_build_index.cpp index 75de0530f430..c06b53765fb6 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_apply_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_apply_build_index.cpp @@ -52,17 +52,25 @@ TVector ApplyBuildIndex(TOperationId nextId, const TTxTrans if (!indexName.empty()) { + auto alterImplTableTransactionTemplate = [] (TPath index, TPath implIndexTable, TTableInfo::TPtr implIndexTableInfo) { + auto indexImplTableAltering = TransactionTemplate(index.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpFinalizeBuildIndexImplTable); + auto alterTable = indexImplTableAltering.MutableAlterTable(); + alterTable->SetName(implIndexTable.LeafName()); + alterTable->MutablePartitionConfig()->MutableCompactionPolicy()->CopyFrom(implIndexTableInfo->PartitionConfig().GetCompactionPolicy()); + alterTable->MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(false); + alterTable->MutablePartitionConfig()->SetShadowData(false); + return indexImplTableAltering; + }; + TPath index = table.Child(indexName); - TPath implIndexTable = index.Child("indexImplTable"); - TTableInfo::TPtr implIndexTableInfo = context.SS->Tables.at(implIndexTable.Base()->PathId); - auto indexImplTableAltering = TransactionTemplate(index.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpFinalizeBuildIndexImplTable); - auto alterTable = indexImplTableAltering.MutableAlterTable(); - alterTable->SetName(implIndexTable.LeafName()); - alterTable->MutablePartitionConfig()->MutableCompactionPolicy()->CopyFrom(implIndexTableInfo->PartitionConfig().GetCompactionPolicy()); - alterTable->MutablePartitionConfig()->MutableCompactionPolicy()->SetKeepEraseMarkers(false); - alterTable->MutablePartitionConfig()->SetShadowData(false); - - result.push_back(CreateFinalizeBuildIndexImplTable(NextPartId(nextId, result), indexImplTableAltering)); + for (const std::string_view implTable : NTableIndex::ImplTables) { + TPath implIndexTable = index.Child(implTable.data()); + if (!implIndexTable.IsResolved()) { + continue; + } + TTableInfo::TPtr implIndexTableInfo = context.SS->Tables.at(implIndexTable.Base()->PathId); + result.push_back(CreateFinalizeBuildIndexImplTable(NextPartId(nextId, result), alterImplTableTransactionTemplate(index, implIndexTable, implIndexTableInfo))); + } } return result; @@ -109,7 +117,7 @@ TVector CancelBuildIndex(TOperationId nextId, const TTxTran Y_ABORT_UNLESS(index.Base()->GetChildren().size() == 1); for (auto& indexChildItems: index.Base()->GetChildren()) { const TString& implTableName = indexChildItems.first; - Y_ABORT_UNLESS(implTableName == "indexImplTable", "unexpected name %s", implTableName.c_str()); + Y_ABORT_UNLESS(NTableIndex::IsImplTable(implTableName), "unexpected name %s", implTableName.c_str()); TPath implTable = index.Child(implTableName); { diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp index 0c0d5f3b25c3..7d6f8d9640a0 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp @@ -226,9 +226,13 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder case Ydb::Table::TableIndex::TypeCase::kGlobalUniqueIndex: explain = "unsupported index type to build"; return false; - case Ydb::Table::TableIndex::TypeCase::kGlobalVectorKmeansTreeIndex: - explain = "unsupported vector index type to build"; - return false; + case Ydb::Table::TableIndex::TypeCase::kGlobalVectorKmeansTreeIndex: { + buildInfo->IndexType = NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree; + NKikimrSchemeOp::TVectorIndexKmeansTreeDescription vectorIndexKmeansTreeDescription; + *vectorIndexKmeansTreeDescription.MutableSettings() = index.global_vector_kmeans_tree_index().vector_settings(); + buildInfo->SpecializedIndexDescription = vectorIndexKmeansTreeDescription; + break; + } case Ydb::Table::TableIndex::TypeCase::TYPE_NOT_SET: explain = "invalid or unset index type"; return false; diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp index c85d2475d916..27e4af12d131 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp @@ -253,6 +253,13 @@ struct TSchemeShard::TIndexBuilder::TTxProgress: public TSchemeShard::TIndexBuil } else if (!buildInfo->InitiateTxDone) { Send(Self->SelfId(), MakeHolder(ui64(buildInfo->InitiateTxId))); } else { + // TODO add vector index filling + if (buildInfo->IndexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { + ChangeState(BuildId, TIndexBuildInfo::EState::Applying); + Progress(BuildId); + break; + } + ChangeState(BuildId, TIndexBuildInfo::EState::Filling); Progress(BuildId); } diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp index ceccb2c67e40..5f2e3b351db5 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp @@ -2122,6 +2122,10 @@ void TIndexBuildInfo::SerializeToProto(TSchemeShard* ss, NKikimrSchemeOp::TIndex for (const auto& implTableDescription : ImplTableDescriptions) { *index.AddIndexImplTableDescriptions() = implTableDescription; } + + if (IndexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) { + *index.MutableVectorIndexKmeansTreeDescription() = std::get(SpecializedIndexDescription); + } } void TIndexBuildInfo::SerializeToProto(TSchemeShard* ss, NKikimrIndexBuilder::TColumnBuildSettings* result) const { diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index 319034742f46..a26af238857f 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -2927,6 +2927,8 @@ struct TIndexBuildInfo: public TSimpleRefCount { NTableIndex::TTableColumns ImplTableColumns; TVector ImplTableDescriptions; + std::variant SpecializedIndexDescription; + EState State = EState::Invalid; TString Issue; diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp index b94b0647b866..a6b73d498139 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp @@ -451,7 +451,7 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( { NKikimrSchemeOp::TTableDescription implTableDesc; - implTableDesc.SetName("indexImplTable"); + implTableDesc.SetName(NTableIndex::ImplTable); SetImplTablePartitionConfig(baseTableInfo->PartitionConfig(), indexTableDesc, implTableDesc); @@ -467,7 +467,7 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( { NKikimrSchemeOp::TTableDescription implTableDesc; - implTableDesc.SetName("indexImplTable"); + implTableDesc.SetName(NTableIndex::ImplTable); SetImplTablePartitionConfig(baseTableDescr.GetPartitionConfig(), indexTableDesc, implTableDesc); From c6a245c352e2611efc5638de24ec2499971aec79 Mon Sep 17 00:00:00 2001 From: azevaykin Date: Fri, 19 Jul 2024 05:07:29 +0000 Subject: [PATCH 2/3] Y_ABORT_UNLESS fix --- .../tx/schemeshard/schemeshard__operation_apply_build_index.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_apply_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_apply_build_index.cpp index c06b53765fb6..e600eefc9c97 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_apply_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_apply_build_index.cpp @@ -114,7 +114,7 @@ TVector CancelBuildIndex(TOperationId nextId, const TTxTran if (!indexName.empty()) { TPath index = table.Child(indexName); - Y_ABORT_UNLESS(index.Base()->GetChildren().size() == 1); + Y_ABORT_UNLESS(index.Base()->GetChildren().size() >= 1); for (auto& indexChildItems: index.Base()->GetChildren()) { const TString& implTableName = indexChildItems.first; Y_ABORT_UNLESS(NTableIndex::IsImplTable(implTableName), "unexpected name %s", implTableName.c_str()); From bf6f316b84040a24b4e4e8ebfcb5f916a4cd55aa Mon Sep 17 00:00:00 2001 From: azevaykin Date: Fri, 19 Jul 2024 05:09:04 +0000 Subject: [PATCH 3/3] replace "indexImplTable" with NTableIndex::ImplTable --- ydb/core/grpc_services/rpc_describe_table.cpp | 3 ++- ydb/core/kqp/gateway/utils/scheme_helpers.cpp | 3 ++- ydb/core/tx/schemeshard/schemeshard__operation_part.cpp | 4 +--- ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ydb/core/grpc_services/rpc_describe_table.cpp b/ydb/core/grpc_services/rpc_describe_table.cpp index 22879f460906..b1d2a1a9d886 100644 --- a/ydb/core/grpc_services/rpc_describe_table.cpp +++ b/ydb/core/grpc_services/rpc_describe_table.cpp @@ -6,6 +6,7 @@ #include "service_table.h" #include "rpc_common/rpc_common.h" +#include #include #include #include @@ -153,7 +154,7 @@ class TDescribeTableRPC : public TRpcSchemeRequestActorMutableOptions()->SetReturnPartitionStats(true); } - if (AppData(ctx)->AllowPrivateTableDescribeForTest || path.EndsWith("/indexImplTable")) { + if (AppData(ctx)->AllowPrivateTableDescribeForTest || path.EndsWith(TStringBuilder() << "/" << NTableIndex::ImplTable)) { record->MutableOptions()->SetShowPrivateTable(true); } diff --git a/ydb/core/kqp/gateway/utils/scheme_helpers.cpp b/ydb/core/kqp/gateway/utils/scheme_helpers.cpp index 8b9657b2607c..403d3f539ed1 100644 --- a/ydb/core/kqp/gateway/utils/scheme_helpers.cpp +++ b/ydb/core/kqp/gateway/utils/scheme_helpers.cpp @@ -1,6 +1,7 @@ #include "scheme_helpers.h" #include +#include #include namespace NKikimr::NKqp::NSchemeHelpers { @@ -46,7 +47,7 @@ bool SplitTablePath(const TString& tableName, const TString& database, std::pair } TString CreateIndexTablePath(const TString& tableName, const TString& indexName) { - return tableName + "/" + indexName + "/indexImplTable"; + return tableName + "/" + indexName + "/" + NTableIndex::ImplTable; } bool SetDatabaseForLoginOperation(TString& result, bool getDomainLoginOnly, TMaybe domainName, diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_part.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_part.cpp index 8e8eccc1d3e2..3b1c0ff832b0 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_part.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_part.cpp @@ -152,10 +152,8 @@ ISubOperation::TPtr CascadeDropTableChildren(TVector& resul } for (auto& [implName, implPathId] : child.Base()->GetChildren()) { - Y_ABORT_UNLESS(implName == "indexImplTable" + Y_ABORT_UNLESS(NTableIndex::IsImplTable(implName) || implName == "streamImpl" - || implName == NTableIndex::NTableVectorKmeansTreeIndex::LevelTable - || implName == NTableIndex::NTableVectorKmeansTreeIndex::PostingTable , "unexpected name %s", implName.c_str()); TPath implPath = child.Child(implName); diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp index 27e4af12d131..85d327ab906e 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__progress.cpp @@ -321,7 +321,7 @@ struct TSchemeShard::TIndexBuilder::TTxProgress: public TSchemeShard::TIndexBuil } if (buildInfo->ImplTablePath.Empty() && buildInfo->IsBuildIndex()) { - TPath implTable = TPath::Init(buildInfo->TablePathId, Self).Dive(buildInfo->IndexName).Dive("indexImplTable"); + TPath implTable = TPath::Init(buildInfo->TablePathId, Self).Dive(buildInfo->IndexName).Dive(NTableIndex::ImplTable); buildInfo->ImplTablePath = implTable.PathString(); TTableInfo::TPtr implTableInfo = Self->Tables.at(implTable.Base()->PathId);