Skip to content

Commit a40991a

Browse files
authored
Start tablets in object domain KIKIMR-20271 (#705)
* Start tablets in object domain KIKIMR-20271
1 parent d4ad98f commit a40991a

39 files changed

+636
-49
lines changed

ydb/core/base/hive.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ namespace NKikimr {
4747
EvRequestTabletOwners,
4848
EvReassignOnDecommitGroup,
4949
EvUpdateTabletsObject,
50+
EvUpdateDomain,
5051

5152
// replies
5253
EvBootTabletReply = EvBootTablet + 512,
@@ -80,6 +81,7 @@ namespace NKikimr {
8081
EvInvalidateStoragePoolsReply,
8182
EvReassignOnDecommitGroupReply,
8283
EvUpdateTabletsObjectReply,
84+
EvUpdateDomainReply,
8385

8486
EvEnd
8587
};
@@ -862,6 +864,10 @@ namespace NKikimr {
862864
Record.SetStatus(status);
863865
}
864866
};
867+
868+
struct TEvUpdateDomain : TEventPB<TEvUpdateDomain, NKikimrHive::TEvUpdateDomain, EvUpdateDomain> {};
869+
870+
struct TEvUpdateDomainReply : TEventPB<TEvUpdateDomainReply, NKikimrHive::TEvUpdateDomainReply, EvUpdateDomainReply> {};
865871
};
866872

867873
IActor* CreateDefaultHive(const TActorId &tablet, TTabletStorageInfo *info);

ydb/core/base/subdomain.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <ydb/core/protos/subdomains.pb.h>
44

5+
#include <util/generic/maybe.h>
56
#include <util/system/types.h>
67
#include <utility>
78

@@ -29,6 +30,8 @@ struct TSubDomainKey : public std::pair<ui64, ui64> {
2930
};
3031

3132
static const TSubDomainKey InvalidSubDomainKey = TSubDomainKey();
33+
34+
using TMaybeServerlessComputeResourcesMode = TMaybe<NKikimrSubDomains::EServerlessComputeResourcesMode, NMaybe::TPolicyUndefinedFail>;
3235
}
3336

3437
template <>

ydb/core/mind/hive/domain_info.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#include "domain_info.h"
2+
3+
namespace NKikimr {
4+
namespace NHive {
5+
6+
ENodeSelectionPolicy TDomainInfo::GetNodeSelectionPolicy() const {
7+
if (!ServerlessComputeResourcesMode) {
8+
return ENodeSelectionPolicy::Default;
9+
}
10+
11+
switch (*ServerlessComputeResourcesMode) {
12+
case NKikimrSubDomains::SERVERLESS_COMPUTE_RESOURCES_MODE_DEDICATED:
13+
return ENodeSelectionPolicy::PreferObjectDomain;
14+
case NKikimrSubDomains::SERVERLESS_COMPUTE_RESOURCES_MODE_SHARED:
15+
return ENodeSelectionPolicy::Default;
16+
default:
17+
return ENodeSelectionPolicy::Default;
18+
}
19+
}
20+
21+
} // NHive
22+
} // NKikimr

ydb/core/mind/hive/domain_info.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,18 @@
55
namespace NKikimr {
66
namespace NHive {
77

8+
enum class ENodeSelectionPolicy : ui32 {
9+
Default,
10+
PreferObjectDomain,
11+
};
12+
813
struct TDomainInfo {
914
TString Path;
1015
TTabletId HiveId = 0;
16+
TMaybeServerlessComputeResourcesMode ServerlessComputeResourcesMode;
17+
18+
ENodeSelectionPolicy GetNodeSelectionPolicy() const;
1119
};
1220

13-
}
14-
}
21+
} // NHive
22+
} // NKikimr

ydb/core/mind/hive/follower_group.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ struct TFollowerGroup {
1717
bool RequireDifferentNodes = false; // do not run followers on same nodes as another followers of the same leader
1818
bool FollowerCountPerDataCenter = false; // PER_AZ KIKIMR-10443
1919

20-
TFollowerGroup() = default;
20+
explicit TFollowerGroup(const THive& hive)
21+
: NodeFilter(hive)
22+
{}
23+
2124
TFollowerGroup(const TFollowerGroup&) = delete;
2225
TFollowerGroup(TFollowerGroup&&) = delete;
2326
TFollowerGroup& operator =(const TFollowerGroup&) = delete;

ydb/core/mind/hive/hive.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
#include "domain_info.h"
12
#include "hive.h"
3+
#include "hive_impl.h"
4+
#include "leader_tablet_info.h"
25

36
#include <ydb/core/util/tuples.h>
47

@@ -76,5 +79,25 @@ NMetrics::EResource GetDominantResourceType(const TResourceNormalizedValues& nor
7679
}
7780
return dominant;
7881
}
82+
83+
TNodeFilter::TNodeFilter(const THive& hive)
84+
: Hive(hive)
85+
{}
86+
87+
TArrayRef<const TSubDomainKey> TNodeFilter::GetEffectiveAllowedDomains() const {
88+
const auto* objectDomainInfo = Hive.FindDomain(ObjectDomain);
89+
90+
if (!objectDomainInfo) {
91+
return {AllowedDomains.begin(), AllowedDomains.end()};
92+
}
93+
94+
switch (objectDomainInfo->GetNodeSelectionPolicy()) {
95+
case ENodeSelectionPolicy::Default:
96+
return {AllowedDomains.begin(), AllowedDomains.end()};
97+
case ENodeSelectionPolicy::PreferObjectDomain:
98+
return {&ObjectDomain, 1};
99+
}
79100
}
80-
}
101+
102+
} // NHive
103+
} // NKikimr

ydb/core/mind/hive/hive.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,13 @@ struct TNodeFilter {
275275
TVector<TSubDomainKey> AllowedDomains;
276276
TVector<TNodeId> AllowedNodes;
277277
TVector<TDataCenterId> AllowedDataCenters;
278+
TSubDomainKey ObjectDomain;
279+
280+
const THive& Hive;
281+
282+
explicit TNodeFilter(const THive& hive);
283+
284+
TArrayRef<const TSubDomainKey> GetEffectiveAllowedDomains() const;
278285
};
279286

280287
} // NHive

ydb/core/mind/hive/hive_domains.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,18 @@ void THive::Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) {
6262
}
6363
}
6464

65+
void THive::Handle(TEvHive::TEvUpdateDomain::TPtr& ev) {
66+
BLOG_D("Handle TEvHive::TEvUpdateDomain(" << ev->Get()->Record.ShortDebugString() << ")");
67+
const TSubDomainKey subdomainKey(ev->Get()->Record.GetDomainKey());
68+
TDomainInfo& domainInfo = Domains[subdomainKey];
69+
if (ev->Get()->Record.GetServerlessComputeResourcesMode() != NKikimrSubDomains::SERVERLESS_COMPUTE_RESOURCES_MODE_UNSPECIFIED) {
70+
domainInfo.ServerlessComputeResourcesMode = ev->Get()->Record.GetServerlessComputeResourcesMode();
71+
} else {
72+
domainInfo.ServerlessComputeResourcesMode.Clear();
73+
}
74+
Execute(CreateUpdateDomain(subdomainKey, std::move(ev)));
75+
}
76+
6577
TString THive::GetDomainName(TSubDomainKey domain) {
6678
auto itDomain = Domains.find(domain);
6779
if (itDomain != Domains.end()) {

ydb/core/mind/hive/hive_impl.cpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,18 @@
1010
#include <library/cpp/time_provider/time_provider.h>
1111
#include <util/generic/array_ref.h>
1212

13-
template <>
14-
inline IOutputStream& operator <<(IOutputStream& out, const TArrayRef<const NKikimrHive::TDataCentersGroup*>& vec) {
13+
Y_DECLARE_OUT_SPEC(inline, TArrayRef<const NKikimrHive::TDataCentersGroup*>, out, vec) {
1514
out << '[';
1615
for (auto it = vec.begin(); it != vec.end(); ++it) {
1716
if (it != vec.begin())
1817
out << ';';
1918
out << (*it)->ShortDebugString();
2019
}
2120
out << ']';
22-
return out;
21+
}
22+
23+
Y_DECLARE_OUT_SPEC(inline, TArrayRef<const NKikimr::TSubDomainKey>, out, vec) {
24+
out << '[' << JoinSeq(',', vec) << ']';
2325
}
2426

2527
namespace NKikimr {
@@ -1268,7 +1270,8 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) {
12681270
<< " to run the tablet " << tablet.ToString()
12691271
<< " node domains " << nodeInfo.ServicedDomains
12701272
<< " tablet object domain " << tablet.GetLeader().ObjectDomain
1271-
<< " tablet allowed domains " << tablet.GetNodeFilter().AllowedDomains);
1273+
<< " tablet allowed domains " << tablet.GetNodeFilter().AllowedDomains
1274+
<< " tablet effective allowed domains " << tablet.GetNodeFilter().GetEffectiveAllowedDomains());
12721275
}
12731276
}
12741277
if (!selectedNodes.empty()) {
@@ -1284,6 +1287,7 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) {
12841287
TNodeInfo* selectedNode = nullptr;
12851288
if (!selectedNodes.empty()) {
12861289
selectedNodes = SelectMaxPriorityNodes(std::move(selectedNodes), tablet);
1290+
BLOG_TRACE("[FBN] Tablet " << tablet.ToString() << " selected max priority nodes count " << selectedNodes.size());
12871291

12881292
switch (GetNodeSelectStrategy()) {
12891293
case NKikimrConfig::THiveConfig::HIVE_NODE_SELECT_STRATEGY_WEIGHTED_RANDOM:
@@ -1336,7 +1340,7 @@ THive::TBestNodeResult THive::FindBestNode(const TTabletInfo& tablet) {
13361340
}
13371341
nodesLeft -= debugState.NodesWithSomeoneFromOurFamily;
13381342
if (debugState.NodesWithoutDomain == nodesLeft) {
1339-
tablet.BootState = TStringBuilder() << "Can't find domain " << tablet.GetNodeFilter().AllowedDomains;
1343+
tablet.BootState = TStringBuilder() << "Can't find domain " << tablet.GetNodeFilter().GetEffectiveAllowedDomains();
13401344
return TBestNodeResult(true);
13411345
}
13421346
nodesLeft -= debugState.NodesWithoutDomain;
@@ -1486,6 +1490,14 @@ TDomainInfo* THive::FindDomain(TSubDomainKey key) {
14861490
return &it->second;
14871491
}
14881492

1493+
const TDomainInfo* THive::FindDomain(TSubDomainKey key) const {
1494+
auto it = Domains.find(key);
1495+
if (it == Domains.end()) {
1496+
return nullptr;
1497+
}
1498+
return &it->second;
1499+
}
1500+
14891501
void THive::DeleteTablet(TTabletId tabletId) {
14901502
auto it = Tablets.find(tabletId);
14911503
if (it != Tablets.end()) {
@@ -2813,6 +2825,7 @@ void THive::ProcessEvent(std::unique_ptr<IEventHandle> event) {
28132825
hFunc(TEvHive::TEvUpdateTabletsObject, Handle);
28142826
hFunc(TEvPrivate::TEvRefreshStorageInfo, Handle);
28152827
hFunc(TEvPrivate::TEvLogTabletMoves, Handle);
2828+
hFunc(TEvHive::TEvUpdateDomain, Handle);
28162829
}
28172830
}
28182831

@@ -2910,6 +2923,7 @@ STFUNC(THive::StateWork) {
29102923
fFunc(TEvHive::TEvUpdateTabletsObject::EventType, EnqueueIncomingEvent);
29112924
fFunc(TEvPrivate::TEvRefreshStorageInfo::EventType, EnqueueIncomingEvent);
29122925
fFunc(TEvPrivate::TEvLogTabletMoves::EventType, EnqueueIncomingEvent);
2926+
fFunc(TEvHive::TEvUpdateDomain::EventType, EnqueueIncomingEvent);
29132927
hFunc(TEvPrivate::TEvProcessIncomingEvent, Handle);
29142928
default:
29152929
if (!HandleDefaultEvents(ev, SelfId())) {

ydb/core/mind/hive/hive_impl.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,6 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
279279
ITransaction* CreateDisconnectNode(THolder<TEvInterconnect::TEvNodeDisconnected> event);
280280
ITransaction* CreateProcessPendingOperations();
281281
ITransaction* CreateProcessBootQueue();
282-
ITransaction* CreateUpdateDomain(TSubDomainKey subdomainKey);
283282
ITransaction* CreateSeizeTablets(TEvHive::TEvSeizeTablets::TPtr event);
284283
ITransaction* CreateSeizeTabletsReply(TEvHive::TEvSeizeTabletsReply::TPtr event);
285284
ITransaction* CreateReleaseTablets(TEvHive::TEvReleaseTablets::TPtr event);
@@ -290,6 +289,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
290289
ITransaction* CreateTabletOwnersReply(TEvHive::TEvTabletOwnersReply::TPtr event);
291290
ITransaction* CreateRequestTabletOwners(TEvHive::TEvRequestTabletOwners::TPtr event);
292291
ITransaction* CreateUpdateTabletsObject(TEvHive::TEvUpdateTabletsObject::TPtr event);
292+
ITransaction* CreateUpdateDomain(TSubDomainKey subdomainKey, TEvHive::TEvUpdateDomain::TPtr event = {});
293293

294294
public:
295295
TDomainsView DomainsView;
@@ -549,6 +549,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
549549
void Handle(TEvPrivate::TEvRefreshStorageInfo::TPtr& ev);
550550
void Handle(TEvPrivate::TEvLogTabletMoves::TPtr& ev);
551551
void Handle(TEvPrivate::TEvProcessIncomingEvent::TPtr& ev);
552+
void Handle(TEvHive::TEvUpdateDomain::TPtr& ev);
552553

553554
protected:
554555
void RestartPipeTx(ui64 tabletId);
@@ -618,6 +619,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
618619
TStoragePoolInfo& GetStoragePool(const TString& name);
619620
TStoragePoolInfo* FindStoragePool(const TString& name);
620621
TDomainInfo* FindDomain(TSubDomainKey key);
622+
const TDomainInfo* FindDomain(TSubDomainKey key) const;
621623
const TNodeLocation& GetNodeLocation(TNodeId nodeId) const;
622624
void DeleteTablet(TTabletId tabletId);
623625
void DeleteNode(TNodeId nodeId);

ydb/core/mind/hive/hive_schema.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,9 +267,10 @@ struct Schema : NIceDb::Schema {
267267
struct Path : Column<3, NScheme::NTypeIds::Utf8> {};
268268
struct Primary : Column<4, NScheme::NTypeIds::Bool> {};
269269
struct HiveId : Column<5, NScheme::NTypeIds::Uint64> {};
270+
struct ServerlessComputeResourcesMode : Column<6, NScheme::NTypeIds::Uint32> { using Type = NKikimrSubDomains::EServerlessComputeResourcesMode; };
270271

271272
using TKey = TableKey<SchemeshardId, PathId>;
272-
using TColumns = TableColumns<SchemeshardId, PathId, Path, Primary, HiveId>;
273+
using TColumns = TableColumns<SchemeshardId, PathId, Path, Primary, HiveId, ServerlessComputeResourcesMode>;
273274
};
274275

275276
struct BlockedOwner : Table<18> {

0 commit comments

Comments
 (0)