Skip to content

Commit 8c7a652

Browse files
authored
Refactoring index in schemeshard (#6879)
1 parent 315c7f3 commit 8c7a652

12 files changed

+266
-145
lines changed

ydb/core/base/table_index.cpp

+84-91
Original file line numberDiff line numberDiff line change
@@ -1,142 +1,136 @@
11
#include "table_index.h"
22

3-
TVector<TString>::const_iterator IsUniq(const TVector<TString>& names) {
4-
THashSet<TString> tmp;
3+
#include <ydb/core/base/table_vector_index.h>
4+
5+
namespace NKikimr::NTableIndex {
6+
namespace {
57

6-
for (auto it = names.begin(); it != names.end(); ++it) {
7-
bool inserted = tmp.insert(*it).second;
8-
if (!inserted) {
9-
return it;
8+
const TString* IsUnique(const TVector<TString>& names, THashSet<TString>& tmp) {
9+
tmp.clear();
10+
for (const auto& name : names) {
11+
if (!tmp.emplace(name).second) {
12+
return &name;
1013
}
1114
}
15+
return nullptr;
16+
}
1217

13-
return names.end();
18+
const TString* IsContains(const TVector<TString>& names, const THashSet<TString>& columns, bool contains = false) {
19+
for (const auto& name : names) {
20+
if (columns.contains(name) == contains) {
21+
return &name;
22+
}
23+
}
24+
return nullptr;
1425
}
1526

16-
bool Contains(const TVector<TString>& names, TString str) {
27+
bool Contains(const TVector<TString>& names, std::string_view str) {
1728
return std::find(names.begin(), names.end(), str) != names.end();
1829
}
1930

20-
namespace NKikimr {
21-
namespace NTableIndex {
31+
}
2232

23-
TTableColumns CalcTableImplDescription(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index) {
33+
TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index) {
2434
TTableColumns result;
2535

26-
if (indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree) {
27-
result.Keys.push_back(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn);
28-
result.Columns.insert(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn);
29-
} else {
30-
for (const auto& ik: index.KeyColumns) {
36+
const bool isSecondaryIndex = type != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree;
37+
if (isSecondaryIndex) {
38+
for (const auto& ik : index.KeyColumns) {
3139
result.Keys.push_back(ik);
32-
result.Columns.insert(ik);
40+
result.Columns.emplace(ik);
3341
}
42+
} else {
43+
result.Keys.push_back(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn);
44+
result.Columns.insert(NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn);
3445
}
3546

36-
for (const auto& tk: table.Keys) {
37-
if (!result.Columns.contains(tk)) {
47+
for (const auto& tk : table.Keys) {
48+
if (result.Columns.emplace(tk).second) {
3849
result.Keys.push_back(tk);
39-
result.Columns.insert(tk);
4050
}
4151
}
4252

43-
for (const auto& dk: index.DataColumns) {
44-
result.Columns.insert(dk);
53+
for (const auto& dk : index.DataColumns) {
54+
result.Columns.emplace(dk);
4555
}
4656

4757
return result;
4858
}
4959

50-
bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index, TString& explain) {
51-
const bool isVectorIndex = indexType == NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree;
60+
bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index, TString& explain) {
61+
if (const auto* broken = IsContains(table.Keys, table.Columns)) {
62+
explain = TStringBuilder()
63+
<< "all table key columns should be in table columns, table key column "
64+
<< *broken << " is missed";
65+
return false;
66+
}
5267

53-
{
54-
auto brokenAt = IsUniq(table.Keys);
55-
if (brokenAt != table.Keys.end()) {
56-
explain = TStringBuilder()
57-
<< "all table keys should be uniq, for example " << *brokenAt;
58-
return false;
59-
}
68+
if (const auto* broken = IsContains(index.KeyColumns, table.Columns)) {
69+
explain = TStringBuilder()
70+
<< "all index key columns should be in table columns, index key column "
71+
<< *broken << " is missed";
72+
return false;
6073
}
6174

62-
{
63-
auto brokenAt = IsUniq(index.KeyColumns);
64-
if (brokenAt != index.KeyColumns.end()) {
65-
explain = TStringBuilder()
66-
<< "all index keys should be uniq, for example " << *brokenAt;
67-
return false;
68-
}
75+
if (const auto* broken = IsContains(index.DataColumns, table.Columns)) {
76+
explain = TStringBuilder()
77+
<< "all index data columns should be in table columns, index data column "
78+
<< *broken << " is missed";
79+
return false;
6980
}
7081

71-
{
72-
auto brokenAt = IsUniq(index.DataColumns);
73-
if (brokenAt != index.DataColumns.end()) {
74-
explain = TStringBuilder()
75-
<< "all data columns should be uniq, for example " << *brokenAt;
76-
return false;
77-
}
82+
THashSet<TString> tmp;
83+
84+
if (const auto* broken = IsUnique(table.Keys, tmp)) {
85+
explain = TStringBuilder()
86+
<< "all table key columns should be unique, for example " << *broken;
87+
return false;
7888
}
7989

80-
if (isVectorIndex) {
90+
if (const auto* broken = IsUnique(index.KeyColumns, tmp)) {
91+
explain = TStringBuilder()
92+
<< "all index key columns should be unique, for example " << *broken;
93+
return false;
94+
}
95+
96+
if (const auto* broken = IsUnique(index.DataColumns, tmp)) {
97+
explain = TStringBuilder()
98+
<< "all index data columns should be unique, for example " << *broken;
99+
return false;
100+
}
101+
102+
const bool isSecondaryIndex = indexType != NKikimrSchemeOp::EIndexType::EIndexTypeGlobalVectorKmeansTree;
103+
104+
if (isSecondaryIndex) {
105+
if (index.KeyColumns == table.Keys) {
106+
explain = "table and index keys are the same";
107+
return false;
108+
}
109+
} else {
81110
if (index.KeyColumns.size() != 1) {
82-
explain = "Only single key column is supported for vector index";
111+
explain = "only single key column is supported for vector index";
83112
return false;
84113
}
85114

86115
if (Contains(index.KeyColumns, NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn)) {
87-
explain = TStringBuilder() << "Key column should not have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn;
116+
explain = TStringBuilder() << "index key column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn;
88117
return false;
89118
}
90-
91119
if (Contains(index.DataColumns, NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn)) {
92-
explain = TStringBuilder() << "Data column should not have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn;
120+
explain = TStringBuilder() << "index data column shouldn't have a reserved name: " << NTableVectorKmeansTreeIndex::PostingTable_ParentIdColumn;
93121
return false;
94122
}
95123
}
96-
97-
THashSet<TString> indexKeys;
98-
99-
for (const auto& tableKeyName: table.Keys) {
100-
indexKeys.insert(tableKeyName);
101-
if (!table.Columns.contains(tableKeyName)) {
102-
explain = TStringBuilder()
103-
<< "all table keys should be in table columns too"
104-
<< ", table key " << tableKeyName << " is missed";
105-
return false;
106-
}
124+
tmp.clear();
125+
tmp.insert(table.Keys.begin(), table.Keys.end());
126+
if (isSecondaryIndex) {
127+
tmp.insert(index.KeyColumns.begin(), index.KeyColumns.end());
107128
}
108-
109-
for (const auto& indexKeyName: index.KeyColumns) {
110-
if (!isVectorIndex)
111-
indexKeys.insert(indexKeyName);
112-
if (!table.Columns.contains(indexKeyName)) {
113-
explain = TStringBuilder()
114-
<< "all index keys should be in table columns"
115-
<< ", index key " << indexKeyName << " is missed";
116-
return false;
117-
}
118-
}
119-
120-
if (index.KeyColumns == table.Keys && !isVectorIndex) {
129+
if (const auto* broken = IsContains(index.DataColumns, tmp, true)) {
121130
explain = TStringBuilder()
122-
<< "table and index keys are the same";
131+
<< "the same column can't be used as key and data column for one index, for example " << *broken;
123132
return false;
124133
}
125-
126-
for (const auto& dataName: index.DataColumns) {
127-
if (indexKeys.contains(dataName)) {
128-
explain = TStringBuilder()
129-
<< "The same column can't be used as key column and data column for one index";
130-
return false;
131-
}
132-
if (!table.Columns.contains(dataName)) {
133-
explain = TStringBuilder()
134-
<< "all index data columns should be in table columns"
135-
<< ", data columns " << dataName << " is missed";
136-
return false;
137-
}
138-
}
139-
140134
return true;
141135
}
142136

@@ -145,4 +139,3 @@ bool IsImplTable(std::string_view tableName) {
145139
}
146140

147141
}
148-
}

ydb/core/base/table_index.h

+4-7
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
#pragma once
22

3-
#include "table_vector_index.h"
43
#include <ydb/core/protos/flat_scheme_op.pb.h>
4+
#include <ydb/core/base/table_vector_index.h>
55

66
#include <util/generic/hash_set.h>
77
#include <util/generic/vector.h>
88
#include <util/generic/string.h>
99
#include <util/string/builder.h>
1010

11-
namespace NKikimr {
12-
namespace NTableIndex {
11+
namespace NKikimr::NTableIndex {
1312

1413
struct TTableColumns {
1514
THashSet<TString> Columns;
@@ -24,10 +23,8 @@ struct TIndexColumns {
2423
inline constexpr const char* ImplTable = "indexImplTable";
2524
inline constexpr std::string_view ImplTables[] = {ImplTable, NTableVectorKmeansTreeIndex::LevelTable, NTableVectorKmeansTreeIndex::PostingTable};
2625

27-
bool IsCompatibleIndex(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index, TString& explain);
28-
TTableColumns CalcTableImplDescription(const NKikimrSchemeOp::EIndexType indexType, const TTableColumns& table, const TIndexColumns& index);
29-
26+
bool IsCompatibleIndex(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index, TString& explain);
27+
TTableColumns CalcTableImplDescription(NKikimrSchemeOp::EIndexType type, const TTableColumns& table, const TIndexColumns& index);
3028
bool IsImplTable(std::string_view tableName);
3129

3230
}
33-
}

0 commit comments

Comments
 (0)