Skip to content

Commit 46d3cf0

Browse files
layout construction optimization (#3956)
1 parent e60cc87 commit 46d3cf0

File tree

4 files changed

+91
-77
lines changed

4 files changed

+91
-77
lines changed

ydb/core/tx/schemeshard/olap/layout/layout.cpp

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "layout.h"
22
#include <ydb/core/tx/schemeshard/schemeshard_impl.h>
3+
#include <ydb/library/actors/core/log.h>
34

45
namespace NKikimr::NSchemeShard {
56

@@ -14,12 +15,37 @@ std::vector<ui64> TColumnTablesLayout::ShardIdxToTabletId(const std::vector<TSha
1415
}
1516

1617
TColumnTablesLayout TColumnTablesLayout::BuildTrivial(const std::vector<ui64>& tabletIds) {
17-
TTableIdsGroup emptyGroup;
18-
TShardIdsGroup shardIdsGroup;
19-
for (const auto& tabletId : tabletIds) {
20-
shardIdsGroup.AddId(tabletId);
18+
std::set<ui64> ids(tabletIds.begin(), tabletIds.end());
19+
return TColumnTablesLayout({ TTablesGroup(&Default<TTableIdsGroup>(), std::move(ids)) });
20+
}
21+
22+
TColumnTablesLayout::TColumnTablesLayout(std::vector<TTablesGroup>&& groups)
23+
: Groups(std::move(groups))
24+
{
25+
AFL_VERIFY(std::is_sorted(Groups.begin(), Groups.end()));
26+
}
27+
28+
bool TColumnTablesLayout::TTablesGroup::TryMerge(const TTablesGroup& item) {
29+
if (GetTableIds() == item.GetTableIds()) {
30+
for (auto&& i : item.ShardIds) {
31+
AFL_VERIFY(ShardIds.emplace(i).second);
32+
}
33+
return true;
34+
} else {
35+
return false;
2136
}
22-
return TColumnTablesLayout({ TTablesGroup(std::move(emptyGroup), std::move(shardIdsGroup)) });
37+
}
38+
39+
const TColumnTablesLayout::TTableIdsGroup& TColumnTablesLayout::TTablesGroup::GetTableIds() const {
40+
AFL_VERIFY(TableIds);
41+
return *TableIds;
42+
}
43+
44+
TColumnTablesLayout::TTablesGroup::TTablesGroup(const TTableIdsGroup* tableIds, std::set<ui64>&& shardIds)
45+
: TableIds(tableIds)
46+
, ShardIds(std::move(shardIds))
47+
{
48+
AFL_VERIFY(TableIds);
2349
}
2450

2551
}

ydb/core/tx/schemeshard/olap/layout/layout.h

Lines changed: 39 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,24 @@
44

55
#include <ydb/library/accessor/accessor.h>
66

7+
#include <util/digest/numeric.h>
78
#include <util/system/types.h>
89

910
#include <set>
1011

1112
namespace NKikimr::NSchemeShard {
1213

13-
template <class TSetElement>
14+
template <class TSetElement, class THashCalcer>
1415
class TLayoutIdSet {
1516
private:
17+
ui64 Hash = 0;
1618
std::set<TSetElement> Elements;
1719
public:
20+
TLayoutIdSet() = default;
21+
TLayoutIdSet(const TSetElement elem) {
22+
AddId(elem);
23+
}
24+
1825
typename std::set<TSetElement>::const_iterator begin() const {
1926
return Elements.begin();
2027
}
@@ -27,18 +34,6 @@ class TLayoutIdSet {
2734
return Elements.size();
2835
}
2936

30-
explicit operator ui64() const {
31-
return Hash();
32-
}
33-
34-
ui64 Hash() const {
35-
ui64 result = 0;
36-
for (auto&& i : Elements) {
37-
result = CombineHashes(result, std::hash<TSetElement>()(i));
38-
}
39-
return result;
40-
}
41-
4237
std::vector<TSetElement> GetIdsVector() const {
4338
return std::vector<TSetElement>(Elements.begin(), Elements.end());
4439
}
@@ -65,11 +60,19 @@ class TLayoutIdSet {
6560
}
6661

6762
bool AddId(const TSetElement& id) {
68-
return Elements.emplace(id).second;
63+
bool result = Elements.emplace(id).second;
64+
if (result) {
65+
Hash ^= THashCalcer::GetHash(id);
66+
}
67+
return result;
6968
}
7069

7170
bool RemoveId(const TSetElement& id) {
72-
return Elements.erase(id);
71+
auto result = Elements.erase(id);
72+
if (result) {
73+
Hash ^= THashCalcer::GetHash(id);
74+
}
75+
return result;
7376
}
7477

7578
bool operator<(const TLayoutIdSet& item) const {
@@ -79,72 +82,52 @@ class TLayoutIdSet {
7982
if (Elements.size() > item.Elements.size()) {
8083
return false;
8184
}
82-
auto itSelf = Elements.begin();
83-
auto itItem = item.Elements.begin();
84-
while (itSelf != Elements.end() && itItem != item.Elements.end()) {
85-
if (*itSelf < *itItem) {
86-
return true;
87-
} else if (*itSelf > *itItem) {
88-
return false;
89-
}
90-
++itSelf;
91-
++itItem;
92-
}
93-
if (itSelf != Elements.end() && itItem == item.Elements.end()) {
94-
return false;
95-
}
96-
if (itSelf == Elements.end() && itItem != item.Elements.end()) {
97-
return true;
98-
}
99-
return false;
85+
return Hash < item.Hash;
10086
}
10187
bool operator==(const TLayoutIdSet& item) const {
10288
if (Elements.size() != item.Elements.size()) {
10389
return false;
10490
}
105-
auto itSelf = Elements.begin();
106-
auto itItem = item.Elements.begin();
107-
while (itSelf != Elements.end() && itItem != item.Elements.end()) {
108-
if (*itSelf != *itItem) {
109-
return false;
110-
}
111-
++itSelf;
112-
++itItem;
113-
}
114-
return true;
91+
return Hash == item.Hash;
11592
}
11693
};
11794

11895
class TSchemeShard;
11996

12097
class TColumnTablesLayout {
98+
private:
99+
class TPathIdHashCalcer {
100+
public:
101+
template <class T>
102+
static ui64 GetHash(const T& data) {
103+
return data.Hash();
104+
}
105+
};
106+
121107
public:
122-
using TShardIdsGroup = TLayoutIdSet<ui64>;
123-
using TTableIdsGroup = TLayoutIdSet<TPathId>;
108+
using TTableIdsGroup = TLayoutIdSet<TPathId, TPathIdHashCalcer>;
124109

125110
class TTablesGroup {
126111
private:
127-
YDB_READONLY_DEF(TTableIdsGroup, TableIds);
128-
YDB_READONLY_DEF(TShardIdsGroup, ShardIds);
112+
const TTableIdsGroup* TableIds = nullptr;
113+
YDB_READONLY_DEF(std::set<ui64>, ShardIds);
129114
public:
130-
TTablesGroup(const TTableIdsGroup& tableIds, TShardIdsGroup&& shardIds)
131-
: TableIds(tableIds)
132-
, ShardIds(std::move(shardIds)) {
115+
TTablesGroup() = default;
116+
TTablesGroup(const TTableIdsGroup* tableIds, std::set<ui64>&& shardIds);
133117

134-
}
118+
const TTableIdsGroup& GetTableIds() const;
119+
120+
bool TryMerge(const TTablesGroup& item);
135121

136122
bool operator<(const TTablesGroup& item) const {
137-
return TableIds < item.TableIds;
123+
return GetTableIds() < item.GetTableIds();
138124
}
139125
};
140126

141127
private:
142128
YDB_READONLY_DEF(std::vector<TTablesGroup>, Groups);
143129
public:
144-
TColumnTablesLayout(std::vector<TTablesGroup>&& groups)
145-
: Groups(std::move(groups)) {
146-
std::sort(Groups.begin(), Groups.end());
147-
}
130+
TColumnTablesLayout(std::vector<TTablesGroup>&& groups);
148131

149132
static std::vector<ui64> ShardIdxToTabletId(const std::vector<TShardIdx>& shards, const TSchemeShard& ss);
150133

ydb/core/tx/schemeshard/olap/manager/manager.cpp

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -82,25 +82,30 @@ size_t TTablesStorage::Drop(const TPathId& id) {
8282
}
8383
}
8484

85-
NKikimr::NSchemeShard::TColumnTablesLayout TTablesStorage::GetTablesLayout(const std::vector<ui64>& tabletIds) const {
86-
THashMap<ui64, TColumnTablesLayout::TTableIdsGroup> tablesByShard;
85+
TColumnTablesLayout TTablesStorage::GetTablesLayout(const std::vector<ui64>& tabletIds) const {
86+
std::vector<TColumnTablesLayout::TTablesGroup> groups;
87+
groups.reserve(tabletIds.size());
8788
for (auto&& i : tabletIds) {
8889
auto it = TablesByShard.find(i);
8990
if (it == TablesByShard.end()) {
90-
tablesByShard.emplace(i, TColumnTablesLayout::TTableIdsGroup());
91+
groups.emplace_back(&Default<TColumnTablesLayout::TTableIdsGroup>(), std::set<ui64>({i}));
9192
} else {
92-
tablesByShard.emplace(i, it->second);
93+
groups.emplace_back(&it->second, std::set<ui64>({i}));
9394
}
9495
}
95-
THashMap<TColumnTablesLayout::TTableIdsGroup, TColumnTablesLayout::TShardIdsGroup> shardsByTables;
96-
for (auto&& i : tablesByShard) {
97-
Y_ABORT_UNLESS(shardsByTables[i.second].AddId(i.first));
98-
}
99-
std::vector<TColumnTablesLayout::TTablesGroup> groups;
100-
groups.reserve(shardsByTables.size());
101-
for (auto&& i : shardsByTables) {
102-
groups.emplace_back(TColumnTablesLayout::TTablesGroup(i.first, std::move(i.second)));
96+
std::sort(groups.begin(), groups.end());
97+
ui32 delta = 0;
98+
for (ui32 i = 0; i + delta + 1 < groups.size();) {
99+
if (delta) {
100+
groups[i + 1] = std::move(groups[i + delta + 1]);
101+
}
102+
if (groups[i].TryMerge(groups[i + 1])) {
103+
++delta;
104+
} else {
105+
++i;
106+
}
103107
}
108+
groups.resize(groups.size() - delta);
104109
return TColumnTablesLayout(std::move(groups));
105110
}
106111

ydb/core/tx/schemeshard/olap/store/store.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,13 @@ TConclusion<TOlapStoreInfo::TLayoutInfo> TOlapStoreInfo::ILayoutPolicy::Layout(c
1313

1414
TConclusion<TOlapStoreInfo::TLayoutInfo> TOlapStoreInfo::TIdentityGroupsLayout::DoLayout(const TColumnTablesLayout& currentLayout, const ui32 shardsCount) const {
1515
for (auto&& i : currentLayout.GetGroups()) {
16-
if (i.GetTableIds().Size() == 0 && i.GetShardIds().Size() >= shardsCount) {
17-
return TOlapStoreInfo::TLayoutInfo(i.GetShardIds().GetIdsVector(shardsCount), true);
16+
if (i.GetTableIds().Size() == 0 && i.GetShardIds().size() >= shardsCount) {
17+
return TOlapStoreInfo::TLayoutInfo(std::vector<ui64>(i.GetShardIds().begin(), std::next(i.GetShardIds().begin(), shardsCount)), true);
1818
}
19-
if (i.GetShardIds().Size() != shardsCount) {
19+
if (i.GetShardIds().size() != shardsCount) {
2020
continue;
2121
}
22-
return TOlapStoreInfo::TLayoutInfo(i.GetShardIds().GetIdsVector(), false);
22+
return TOlapStoreInfo::TLayoutInfo(std::vector<ui64>(i.GetShardIds().begin(), i.GetShardIds().end()), false);
2323
}
2424
return TConclusionStatus::Fail("cannot find appropriate group for " + ::ToString(shardsCount) + " shards");
2525
}

0 commit comments

Comments
 (0)