Skip to content

Commit 6082c72

Browse files
committed
vector index test
1 parent 5ed1099 commit 6082c72

File tree

5 files changed

+182
-9
lines changed

5 files changed

+182
-9
lines changed

ydb/core/tx/schemeshard/ut_helpers/helpers.cpp

+17-5
Original file line numberDiff line numberDiff line change
@@ -1699,11 +1699,23 @@ namespace NSchemeShardUT_Private {
16991699
} break;
17001700
case NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree: {
17011701
auto& settings = *index.mutable_global_vector_kmeans_tree_index();
1702-
settings = Ydb::Table::GlobalVectorKMeansTreeIndex();
1703-
// some random valid settings
1704-
settings.mutable_vector_settings()->set_vector_type(Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT);
1705-
settings.mutable_vector_settings()->set_vector_dimension(42);
1706-
settings.mutable_vector_settings()->set_distance(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE);
1702+
1703+
auto& vectorIndexSettings = *settings.mutable_vector_settings();
1704+
if (cfg.VectorIndexSettings) {
1705+
cfg.VectorIndexSettings->SerializeTo(vectorIndexSettings);
1706+
} else {
1707+
// some random valid settings
1708+
vectorIndexSettings.set_vector_type(Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT);
1709+
vectorIndexSettings.set_vector_dimension(42);
1710+
vectorIndexSettings.set_distance(Ydb::Table::VectorIndexSettings::DISTANCE_COSINE);
1711+
}
1712+
1713+
if (cfg.GlobalIndexSettings) {
1714+
cfg.GlobalIndexSettings[0].SerializeTo(*settings.mutable_level_table_settings());
1715+
if (cfg.GlobalIndexSettings.size() > 1) {
1716+
cfg.GlobalIndexSettings[1].SerializeTo(*settings.mutable_posting_table_settings());
1717+
}
1718+
}
17071719
} break;
17081720
default:
17091721
UNIT_ASSERT_C(false, "Unknown index type: " << static_cast<ui32>(cfg.IndexType));

ydb/core/tx/schemeshard/ut_helpers/helpers.h

+3
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464

6565
namespace NYdb::NTable {
6666
struct TGlobalIndexSettings;
67+
struct TVectorIndexSettings;
6768
}
6869

6970
namespace NSchemeShardUT_Private {
@@ -371,6 +372,8 @@ namespace NSchemeShardUT_Private {
371372
TVector<TString> IndexColumns;
372373
TVector<TString> DataColumns;
373374
TVector<NYdb::NTable::TGlobalIndexSettings> GlobalIndexSettings = {};
375+
// implementation note: it was made a pointer, not optional, to enable forward declaration
376+
std::unique_ptr<NYdb::NTable::TVectorIndexSettings> VectorIndexSettings = {};
374377
};
375378

376379
std::unique_ptr<TEvIndexBuilder::TEvCreateRequest> CreateBuildColumnRequest(ui64 id, const TString& dbName, const TString& src, const TString& columnName, const Ydb::TypedValue& literal);

ydb/core/tx/schemeshard/ut_helpers/ls_checks.cpp

+55-3
Original file line numberDiff line numberDiff line change
@@ -853,8 +853,8 @@ TCheckFunc IndexDataColumns(const TVector<TString>& dataColumnNames) {
853853
};
854854
}
855855

856-
TCheckFunc VectorIndexDescription(Ydb::Table::VectorIndexSettings_Distance dist,
857-
Ydb::Table::VectorIndexSettings_Similarity similarity,
856+
TCheckFunc VectorIndexDescription(Ydb::Table::VectorIndexSettings_Distance dist,
857+
Ydb::Table::VectorIndexSettings_Similarity similarity,
858858
Ydb::Table::VectorIndexSettings_VectorType vectorType,
859859
ui32 vectorDimension
860860
) {
@@ -1311,11 +1311,63 @@ TCheckFunc PartitionKeys(TVector<TString> lastShardKeys) {
13111311
const auto& pathDescr = record.GetPathDescription();
13121312
UNIT_ASSERT_VALUES_EQUAL(lastShardKeys.size(), pathDescr.TablePartitionsSize());
13131313
for (size_t i = 0; i < lastShardKeys.size(); ++i) {
1314-
UNIT_ASSERT_STRING_CONTAINS(pathDescr.GetTablePartitions(i).GetEndOfRangeKeyPrefix(), lastShardKeys[i]);
1314+
const auto& partition = pathDescr.GetTablePartitions(i);
1315+
UNIT_ASSERT_STRING_CONTAINS_C(
1316+
partition.GetEndOfRangeKeyPrefix(), lastShardKeys[i],
1317+
"partition index: " << i << '\n'
1318+
<< "actual key prefix: " << partition.GetEndOfRangeKeyPrefix().Quote() << '\n'
1319+
<< "expected key prefix: " << lastShardKeys[i].Quote() << '\n'
1320+
);
13151321
}
13161322
};
13171323
}
13181324

1325+
namespace {
1326+
1327+
// Serializes / deserializes a value of type T to a cell vector string representation.
1328+
template <typename T>
1329+
struct TSplitBoundarySerializer {
1330+
static TString Serialize(T splitBoundary) {
1331+
const auto cell = TCell::Make(splitBoundary);
1332+
TSerializedCellVec cellVec(TArrayRef<const TCell>(&cell, 1));
1333+
return cellVec.ReleaseBuffer();
1334+
}
1335+
1336+
static TVector<T> Deserialize(const TString& serializedCells) {
1337+
TSerializedCellVec cells(serializedCells);
1338+
TVector<T> values;
1339+
for (const auto& cell : cells.GetCells()) {
1340+
if (cell.IsNull()) {
1341+
// the last cell
1342+
break;
1343+
}
1344+
values.emplace_back(cell.AsValue<T>());
1345+
}
1346+
return values;
1347+
}
1348+
};
1349+
1350+
}
1351+
1352+
template <typename T>
1353+
TCheckFunc SplitBoundaries(TVector<T>&& expectedBoundaries) {
1354+
return [expectedBoundaries = std::move(expectedBoundaries)] (const NKikimrScheme::TEvDescribeSchemeResult& record) {
1355+
const auto& pathDescr = record.GetPathDescription();
1356+
UNIT_ASSERT_VALUES_EQUAL(pathDescr.TablePartitionsSize(), expectedBoundaries.size() + 1);
1357+
for (size_t i = 0; i < expectedBoundaries.size(); ++i) {
1358+
const auto& partition = pathDescr.GetTablePartitions(i);
1359+
const auto actualBoundary = TSplitBoundarySerializer<T>::Deserialize(partition.GetEndOfRangeKeyPrefix()).at(0);
1360+
UNIT_ASSERT_VALUES_EQUAL_C(
1361+
actualBoundary, expectedBoundaries[i],
1362+
"partition index: " << i << '\n'
1363+
<< "actual key prefix: " << partition.GetEndOfRangeKeyPrefix().Quote() << '\n'
1364+
);
1365+
}
1366+
};
1367+
}
1368+
1369+
template TCheckFunc SplitBoundaries<ui32>(TVector<ui32>&&);
1370+
13191371
TCheckFunc ServerlessComputeResourcesMode(NKikimrSubDomains::EServerlessComputeResourcesMode serverlessComputeResourcesMode) {
13201372
return [=] (const NKikimrScheme::TEvDescribeSchemeResult& record) {
13211373
UNIT_ASSERT_C(IsGoodDomainStatus(record.GetStatus()), "Unexpected status: " << record.GetStatus());

ydb/core/tx/schemeshard/ut_helpers/ls_checks.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ namespace NLs {
102102
void CheckBoundaries(const NKikimrScheme::TEvDescribeSchemeResult& record);
103103
TCheckFunc PartitionCount(ui32 count);
104104
TCheckFunc PartitionKeys(TVector<TString> lastShardKeys);
105+
// Checks if the serialized representation of an expected boundary is a prefix of the actual one.
106+
// Similar to PartitionKeys check, but does not require you to pass split boundaries in a serialized form.
107+
template <typename T>
108+
TCheckFunc SplitBoundaries(TVector<T>&& expectedBoundaries);
105109
TCheckFunc FollowerCount(ui32 count);
106110
TCheckFunc CrossDataCenterFollowerCount(ui32 count);
107111
TCheckFunc AllowFollowerPromotion(bool val);
@@ -141,7 +145,7 @@ namespace NLs {
141145
TCheckFunc IndexState(NKikimrSchemeOp::EIndexState state);
142146
TCheckFunc IndexKeys(const TVector<TString>& keyNames);
143147
TCheckFunc IndexDataColumns(const TVector<TString>& dataColumnNames);
144-
148+
145149
TCheckFunc VectorIndexDescription(Ydb::Table::VectorIndexSettings_Distance dist,
146150
Ydb::Table::VectorIndexSettings_Similarity similarity,
147151
Ydb::Table::VectorIndexSettings_VectorType vectorType,

ydb/core/tx/schemeshard/ut_index_build/ut_vector_index_build.cpp

+102
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
#include <ydb/core/base/table_index.h>
22
#include <ydb/core/tx/schemeshard/ut_helpers/helpers.h>
33
#include <ydb/core/tx/schemeshard/schemeshard_billing_helpers.h>
4+
#include <ydb/core/testlib/actors/block_events.h>
45
#include <ydb/core/testlib/tablet_helpers.h>
56

67
#include <ydb/core/tx/datashard/datashard.h>
78
#include <ydb/core/metering/metering.h>
89

10+
#include <ydb/public/sdk/cpp/client/ydb_table/table.h>
11+
912
using namespace NKikimr;
1013
using namespace NSchemeShard;
1114
using namespace NSchemeShardUT_Private;
@@ -225,4 +228,103 @@ Y_UNIT_TEST_SUITE (VectorIndexBuildTest) {
225228

226229
UNIT_ASSERT_VALUES_EQUAL(billRecords.size(), 0);
227230
}
231+
232+
Y_UNIT_TEST(VectorIndexDescriptionIsPersisted) {
233+
TTestBasicRuntime runtime;
234+
TTestEnv env(runtime);
235+
ui64 txId = 100;
236+
237+
TestCreateTable(runtime, ++txId, "/MyRoot", R"(
238+
Name: "vectors"
239+
Columns { Name: "id" Type: "Uint64" }
240+
Columns { Name: "embedding" Type: "String" }
241+
Columns { Name: "covered" Type: "String" }
242+
KeyColumnNames: [ "id" ]
243+
)");
244+
env.TestWaitNotification(runtime, txId);
245+
246+
const auto globalIndexSettings = []{
247+
Ydb::Table::GlobalIndexSettings globalIndexSettings;
248+
UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(R"(
249+
partition_at_keys {
250+
split_points {
251+
type { tuple_type { elements { optional_type { item { type_id: UINT32 } } } } }
252+
value { items { uint32_value: 12345 } }
253+
}
254+
split_points {
255+
type { tuple_type { elements { optional_type { item { type_id: UINT32 } } } } }
256+
value { items { uint32_value: 54321 } }
257+
}
258+
}
259+
partitioning_settings {
260+
min_partitions_count: 3
261+
max_partitions_count: 3
262+
}
263+
)", &globalIndexSettings));
264+
return NYdb::NTable::TGlobalIndexSettings::FromProto(globalIndexSettings);
265+
}();
266+
267+
auto vectorIndexSettings = []{
268+
Ydb::Table::VectorIndexSettings vectorIndexSettings;
269+
UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(R"(
270+
distance: DISTANCE_COSINE,
271+
vector_type: VECTOR_TYPE_FLOAT,
272+
vector_dimension: 1024
273+
)", &vectorIndexSettings));
274+
using T = NYdb::NTable::TVectorIndexSettings;
275+
return std::make_unique<T>(T::FromProto(vectorIndexSettings));
276+
}();
277+
278+
TBlockEvents<TEvSchemeShard::TEvModifySchemeTransaction> indexCreationBlocker(runtime, [](const auto& ev) {
279+
const auto& modifyScheme = ev->Get()->Record.GetTransaction(0);
280+
return modifyScheme.GetOperationType() == NKikimrSchemeOp::ESchemeOpCreateIndexBuild;
281+
});
282+
283+
const ui64 buildIndexTx = ++txId;
284+
TestBuildIndex(runtime, buildIndexTx, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/vectors", TBuildIndexConfig{
285+
"by_embedding", NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree, { "embedding" }, { "covered" },
286+
{ globalIndexSettings, globalIndexSettings }, std::move(vectorIndexSettings)
287+
});
288+
289+
RebootTablet(runtime, TTestTxConfig::SchemeShard, runtime.AllocateEdgeActor());
290+
291+
indexCreationBlocker.Stop().Unblock();
292+
env.TestWaitNotification(runtime, buildIndexTx);
293+
294+
auto buildIndexOperation = TestGetBuildIndex(runtime, TTestTxConfig::SchemeShard, "/MyRoot", buildIndexTx);
295+
UNIT_ASSERT_VALUES_EQUAL_C(
296+
buildIndexOperation.GetIndexBuild().GetState(), Ydb::Table::IndexBuildState::STATE_DONE,
297+
buildIndexOperation.DebugString()
298+
);
299+
300+
TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/vectors/by_embedding"), {
301+
NLs::PathExist,
302+
NLs::IndexState(NKikimrSchemeOp::EIndexStateReady),
303+
NLs::IndexType(NKikimrSchemeOp::EIndexTypeGlobalVectorKmeansTree),
304+
NLs::IndexKeys({"embedding"}),
305+
NLs::IndexDataColumns({"covered"}),
306+
NLs::VectorIndexDescription(
307+
Ydb::Table::VectorIndexSettings::DISTANCE_COSINE,
308+
Ydb::Table::VectorIndexSettings::SIMILARITY_UNSPECIFIED,
309+
Ydb::Table::VectorIndexSettings::VECTOR_TYPE_FLOAT,
310+
1024
311+
)
312+
});
313+
314+
using namespace NKikimr::NTableIndex::NTableVectorKmeansTreeIndex;
315+
TestDescribeResult(DescribePrivatePath(runtime, JoinFsPaths("/MyRoot/vectors/by_embedding", LevelTable), true, true), {
316+
NLs::IsTable,
317+
NLs::PartitionCount(3),
318+
NLs::MinPartitionsCountEqual(3),
319+
NLs::MaxPartitionsCountEqual(3),
320+
NLs::SplitBoundaries<ui32>({12345, 54321})
321+
});
322+
TestDescribeResult(DescribePrivatePath(runtime, JoinFsPaths("/MyRoot/vectors/by_embedding", PostingTable), true, true), {
323+
NLs::IsTable,
324+
NLs::PartitionCount(3),
325+
NLs::MinPartitionsCountEqual(3),
326+
NLs::MaxPartitionsCountEqual(3),
327+
NLs::SplitBoundaries<ui32>({12345, 54321})
328+
});
329+
}
228330
}

0 commit comments

Comments
 (0)