Skip to content

Commit 53c0ea3

Browse files
committed
vector index test
1 parent 10bcf85 commit 53c0ea3

File tree

5 files changed

+179
-9
lines changed

5 files changed

+179
-9
lines changed

ydb/core/tx/schemeshard/ut_helpers/helpers.cpp

+17-5
Original file line numberDiff line numberDiff line change
@@ -1699,11 +1699,23 @@ namespace NSchemeShardUT_Private {
16991699
} break;
17001700
case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: {
17011701
auto& settings = *index.mutable_global_vector_kmeans_tree_index();
1702-
settings = Ydb::Table::GlobalVectorKMeansTreeIndex();
1703-
// some random valid settings
1704-
settings.mutable_vector_settings()->set_vector_type(Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT);
1705-
settings.mutable_vector_settings()->set_vector_dimension(42);
1706-
settings.mutable_vector_settings()->set_distance(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE);
1702+
1703+
auto& vectorIndexSettings = *settings.mutable_vector_settings();
1704+
if (cfg.VectorIndexSettings) {
1705+
cfg.VectorIndexSettings->SerializeTo(vectorIndexSettings);
1706+
} else {
1707+
// some random valid settings
1708+
vectorIndexSettings.set_vector_type(Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT);
1709+
vectorIndexSettings.set_vector_dimension(42);
1710+
vectorIndexSettings.set_distance(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE);
1711+
}
1712+
1713+
if (cfg.GlobalIndexSettings) {
1714+
cfg.GlobalIndexSettings[0].SerializeTo(*settings.mutable_level_table_settings());
1715+
if (cfg.GlobalIndexSettings.size() > 1) {
1716+
cfg.GlobalIndexSettings[1].SerializeTo(*settings.mutable_posting_table_settings());
1717+
}
1718+
}
17071719
} break;
17081720
default:
17091721
UNIT_ASSERT_C(false, "Unknown index type: " << static_cast<ui32>(cfg.IndexType));

ydb/core/tx/schemeshard/ut_helpers/helpers.h

+3
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464

6565
namespace NYdb::NTable {
6666
struct TGlobalIndexSettings;
67+
struct TVectorIndexSettings;
6768
}
6869

6970
namespace NSchemeShardUT_Private {
@@ -371,6 +372,8 @@ namespace NSchemeShardUT_Private {
371372
TVector<TString> IndexColumns;
372373
TVector<TString> DataColumns;
373374
TVector<NYdb::NTable::TGlobalIndexSettings> GlobalIndexSettings = {};
375+
// implementation note: it was made a pointer, not optional, to enable forward declaration
376+
std::unique_ptr<NYdb::NTable::TVectorIndexSettings> VectorIndexSettings = {};
374377
};
375378

376379
std::unique_ptr<TEvIndexBuilder::TEvCreateRequest> CreateBuildColumnRequest(ui64 id, const TString& dbName, const TString& src, const TString& columnName, const Ydb::TypedValue& literal);

ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp

+55-3
Original file line numberDiff line numberDiff line change
@@ -853,8 +853,8 @@ TCheckFunc IndexDataColumns(const TVector<TString>& dataColumnNames) {
853853
};
854854
}
855855

856-
TCheckFunc VectorIndexDescription(Ydb::Table::VectorIndexSettings_Distance dist,
857-
Ydb::Table::VectorIndexSettings_Similarity similarity,
856+
TCheckFunc VectorIndexDescription(Ydb::Table::VectorIndexSettings_Distance dist,
857+
Ydb::Table::VectorIndexSettings_Similarity similarity,
858858
Ydb::Table::VectorIndexSettings_VectorType vectorType,
859859
ui32 vectorDimension
860860
) {
@@ -1311,11 +1311,63 @@ TCheckFunc PartitionKeys(TVector<TString> lastShardKeys) {
13111311
const auto& pathDescr = record.GetPathDescription();
13121312
UNIT_ASSERT_VALUES_EQUAL(lastShardKeys.size(), pathDescr.TablePartitionsSize());
13131313
for (size_t i = 0; i < lastShardKeys.size(); ++i) {
1314-
UNIT_ASSERT_STRING_CONTAINS(pathDescr.GetTablePartitions(i).GetEndOfRangeKeyPrefix(), lastShardKeys[i]);
1314+
const auto& partition = pathDescr.GetTablePartitions(i);
1315+
UNIT_ASSERT_STRING_CONTAINS_C(
1316+
partition.GetEndOfRangeKeyPrefix(), lastShardKeys[i],
1317+
"partition index: " << i << '\n'
1318+
<< "actual key prefix: " << partition.GetEndOfRangeKeyPrefix().Quote() << '\n'
1319+
<< "expected key prefix: " << lastShardKeys[i].Quote() << '\n'
1320+
);
13151321
}
13161322
};
13171323
}
13181324

1325+
namespace {
1326+
1327+
// Serializes / deserializes a value of type T to a cell vector string representation.
1328+
template <typename T>
1329+
struct TSplitBoundarySerializer {
1330+
static TString Serialize(T splitBoundary) {
1331+
const auto cell = TCell::Make(splitBoundary);
1332+
TSerializedCellVec cellVec(TArrayRef<const TCell>(&cell, 1));
1333+
return cellVec.ReleaseBuffer();
1334+
}
1335+
1336+
static TVector<T> Deserialize(const TString& serializedCells) {
1337+
TSerializedCellVec cells(serializedCells);
1338+
TVector<T> values;
1339+
for (const auto& cell : cells.GetCells()) {
1340+
if (cell.IsNull()) {
1341+
// the last cell
1342+
break;
1343+
}
1344+
values.emplace_back(cell.AsValue<T>());
1345+
}
1346+
return values;
1347+
}
1348+
};
1349+
1350+
}
1351+
1352+
template <typename T>
1353+
TCheckFunc SplitBoundaries(TVector<T>&& expectedBoundaries) {
1354+
return [expectedBoundaries = std::move(expectedBoundaries)] (const NKikimrScheme::TEvDescribeSchemeResult& record) {
1355+
const auto& pathDescr = record.GetPathDescription();
1356+
UNIT_ASSERT_VALUES_EQUAL(pathDescr.TablePartitionsSize(), expectedBoundaries.size() + 1);
1357+
for (size_t i = 0; i < expectedBoundaries.size(); ++i) {
1358+
const auto& partition = pathDescr.GetTablePartitions(i);
1359+
const auto actualBoundary = TSplitBoundarySerializer<T>::Deserialize(partition.GetEndOfRangeKeyPrefix()).at(0);
1360+
UNIT_ASSERT_VALUES_EQUAL_C(
1361+
actualBoundary, expectedBoundaries[i],
1362+
"partition index: " << i << '\n'
1363+
<< "actual key prefix: " << partition.GetEndOfRangeKeyPrefix().Quote() << '\n'
1364+
);
1365+
}
1366+
};
1367+
}
1368+
1369+
template TCheckFunc SplitBoundaries<ui32>(TVector<ui32>&&);
1370+
13191371
TCheckFunc ServerlessComputeResourcesMode(NKikimrSubDomains::EServerlessComputeResourcesMode serverlessComputeResourcesMode) {
13201372
return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) {
13211373
UNIT_ASSERT_C(IsGoodDomainStatus(record.GetStatus()), "Unexpected status: " << record.GetStatus());

ydb/core/tx/schemeshard/ut_helpers/ls_checks.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ namespace NLs {
102102
void CheckBoundaries(const NKikimrScheme::TEvDescribeSchemeResult& record);
103103
TCheckFunc PartitionCount(ui32 count);
104104
TCheckFunc PartitionKeys(TVector<TString> lastShardKeys);
105+
// Checks if the serialized representation of an expected boundary is a prefix of the actual one.
106+
// Similar to PartitionKeys check, but does not require you to pass split boundaries in a serialized form.
107+
template <typename T>
108+
TCheckFunc SplitBoundaries(TVector<T>&& expectedBoundaries);
105109
TCheckFunc FollowerCount(ui32 count);
106110
TCheckFunc CrossDataCenterFollowerCount(ui32 count);
107111
TCheckFunc AllowFollowerPromotion(bool val);
@@ -141,7 +145,7 @@ namespace NLs {
141145
TCheckFunc IndexState(NKikimrSchemeOp::EIndexState state);
142146
TCheckFunc IndexKeys(const TVector<TString>& keyNames);
143147
TCheckFunc IndexDataColumns(const TVector<TString>& dataColumnNames);
144-
148+
145149
TCheckFunc VectorIndexDescription(Ydb::Table::VectorIndexSettings_Distance dist,
146150
Ydb::Table::VectorIndexSettings_Similarity similarity,
147151
Ydb::Table::VectorIndexSettings_VectorType vectorType,

ydb/core/tx/schemeshard/ut_index_build/ut_index_build.cpp

+99
Original file line numberDiff line numberDiff line change
@@ -1006,6 +1006,105 @@ Y_UNIT_TEST_SUITE(IndexBuildTest) {
10061006
});
10071007
}
10081008

1009+
Y_UNIT_TEST(VectorIndexDescriptionIsPersisted) {
1010+
TTestBasicRuntime runtime;
1011+
TTestEnv env(runtime);
1012+
ui64 txId = 100;
1013+
1014+
TestCreateTable(runtime, ++txId, "/MyRoot", R"(
1015+
Name: "vectors"
1016+
Columns { Name: "id" Type: "Uint64" }
1017+
Columns { Name: "embedding" Type: "String" }
1018+
Columns { Name: "covered" Type: "String" }
1019+
KeyColumnNames: [ "id" ]
1020+
)");
1021+
env.TestWaitNotification(runtime, txId);
1022+
1023+
const auto globalIndexSettings = []{
1024+
Ydb::Table::GlobalIndexSettings globalIndexSettings;
1025+
UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(R"(
1026+
partition_at_keys {
1027+
split_points {
1028+
type { tuple_type { elements { optional_type { item { type_id: UINT32 } } } } }
1029+
value { items { uint32_value: 12345 } }
1030+
}
1031+
split_points {
1032+
type { tuple_type { elements { optional_type { item { type_id: UINT32 } } } } }
1033+
value { items { uint32_value: 54321 } }
1034+
}
1035+
}
1036+
partitioning_settings {
1037+
min_partitions_count: 3
1038+
max_partitions_count: 3
1039+
}
1040+
)", &globalIndexSettings));
1041+
return NYdb::NTable::TGlobalIndexSettings::FromProto(globalIndexSettings);
1042+
}();
1043+
1044+
auto vectorIndexSettings = []{
1045+
Ydb::Table::VectorIndexSettings vectorIndexSettings;
1046+
UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(R"(
1047+
distance: DISTANCE_COSINE,
1048+
vector_type: VECTOR_TYPE_FLOAT,
1049+
vector_dimension: 1024
1050+
)", &vectorIndexSettings));
1051+
using T = NYdb::NTable::TVectorIndexSettings;
1052+
return std::make_unique<T>(T::FromProto(vectorIndexSettings));
1053+
}();
1054+
1055+
TBlockEvents<TEvSchemeShard::TEvModifySchemeTransaction> indexCreationBlocker(runtime, [](const auto& ev) {
1056+
const auto& modifyScheme = ev->Get()->Record.GetTransaction(0);
1057+
return modifyScheme.GetOperationType() == NKikimrSchemeOp::ESchemeOpCreateIndexBuild;
1058+
});
1059+
1060+
const ui64 buildIndexTx = ++txId;
1061+
TestBuildIndex(runtime, buildIndexTx, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/vectors", TBuildIndexConfig{
1062+
"by_embedding", NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree, { "embedding" }, { "covered" },
1063+
{ globalIndexSettings, globalIndexSettings }, std::move(vectorIndexSettings)
1064+
});
1065+
1066+
RebootTablet(runtime, TTestTxConfig::SchemeShard, runtime.AllocateEdgeActor());
1067+
1068+
indexCreationBlocker.Stop().Unblock();
1069+
env.TestWaitNotification(runtime, buildIndexTx);
1070+
1071+
auto buildIndexOperation = TestGetBuildIndex(runtime, TTestTxConfig::SchemeShard, "/MyRoot", buildIndexTx);
1072+
UNIT_ASSERT_VALUES_EQUAL_C(
1073+
buildIndexOperation.GetIndexBuild().GetState(), Ydb::Table::IndexBuildState::STATE_DONE,
1074+
buildIndexOperation.DebugString()
1075+
);
1076+
1077+
TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/by_embedding"), {
1078+
NLs::PathExist,
1079+
NLs::IndexState(NKikimrSchemeOp::EIndexStateReady),
1080+
NLs::IndexType(NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree),
1081+
NLs::IndexKeys({"embedding"}),
1082+
NLs::IndexDataColumns({"covered"}),
1083+
NLs::VectorIndexDescription(
1084+
Ydb::Table::VectorIndexSettings::DISTANCE_COSINE,
1085+
Ydb::Table::VectorIndexSettings::SIMILARITY_UNSPECIFIED,
1086+
Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT,
1087+
1024
1088+
)
1089+
});
1090+
1091+
using namespace NKikimr::NTableIndex::NTableVectorKmeansTreeIndex;
1092+
TestDescribeResult(DescribePrivatePath(runtime, JoinFsPaths("/MyRoot/vectors/by_embedding", LevelTable), true, true), {
1093+
NLs::IsTable,
1094+
NLs::PartitionCount(3),
1095+
NLs::MinPartitionsCountEqual(3),
1096+
NLs::MaxPartitionsCountEqual(3),
1097+
NLs::SplitBoundaries<ui32>({12345, 54321})
1098+
});
1099+
TestDescribeResult(DescribePrivatePath(runtime, JoinFsPaths("/MyRoot/vectors/by_embedding", PostingTable), true, true), {
1100+
NLs::IsTable,
1101+
NLs::PartitionCount(3),
1102+
NLs::MinPartitionsCountEqual(3),
1103+
NLs::MaxPartitionsCountEqual(3),
1104+
NLs::SplitBoundaries<ui32>({12345, 54321})
1105+
});
1106+
}
1107+
10091108
Y_UNIT_TEST(DropIndex) {
10101109
TTestBasicRuntime runtime;
10111110
TTestEnv env(runtime);

0 commit comments

Comments
 (0)