Skip to content

Commit 9432d7d

Browse files
authored
Add CMS request priorities KIKIMR-9024 (#1620)
* Add CMS request priorities KIKIMR-9024
1 parent a08366f commit 9432d7d

20 files changed

+442
-55
lines changed

ydb/core/cms/api_adapters.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,7 @@ class TCreateMaintenanceTask: public TPermissionResponseProcessor<
378378
cmsRequest.SetAvailabilityMode(ConvertAvailabilityMode(opts.availability_mode()));
379379
cmsRequest.SetPartialPermissionAllowed(true);
380380
cmsRequest.SetSchedule(true);
381+
cmsRequest.SetPriority(opts.priority());
381382

382383
for (const auto& group : request.action_groups()) {
383384
Y_ABORT_UNLESS(group.actions().size() == 1);
@@ -559,7 +560,8 @@ class TGetMaintenanceTask: public TAdapterActor<
559560
opts.set_task_uid(taskUid);
560561
opts.set_description(request.GetReason());
561562
opts.set_availability_mode(ConvertAvailabilityMode(request.GetAvailabilityMode()));
562-
563+
opts.set_priority(request.GetPriority());
564+
563565
// pending actions
564566
for (const auto& action : request.GetActions()) {
565567
ConvertAction(action, *result.add_action_group_states()->add_action_states());

ydb/core/cms/cluster_info.cpp

+41-24
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,12 @@ bool TLockableItem::IsLocked(TErrorInfo &error, TDuration defaultRetryTime,
6060
return true;
6161
}
6262

63-
if (!ScheduledLocks.empty() && ScheduledLocks.begin()->Order < DeactivatedLocksOrder) {
63+
if (!ScheduledLocks.empty() && ScheduledLocks.begin()->Priority < DeactivatedLocksPriority) {
6464
error.Code = TStatus::DISALLOW_TEMP;
65-
error.Reason = Sprintf("%s has scheduled action %s owned by %s (order %" PRIu64 " vs %" PRIu64 ")",
65+
error.Reason = Sprintf("%s has scheduled action %s owned by %s (priority %" PRIi32 " vs %" PRIi32 ")",
6666
PrettyItemName().data(), ScheduledLocks.begin()->RequestId.data(),
67-
ScheduledLocks.begin()->Owner.data(), ScheduledLocks.begin()->Order,
68-
DeactivatedLocksOrder);
67+
ScheduledLocks.begin()->Owner.data(), ScheduledLocks.begin()->Priority,
68+
DeactivatedLocksPriority);
6969
error.Deadline = now + defaultRetryTime;
7070
return true;
7171
}
@@ -113,12 +113,12 @@ void TLockableItem::RollbackLocks(ui64 point)
113113

114114
void TLockableItem::ReactivateScheduledLocks()
115115
{
116-
DeactivatedLocksOrder = Max<ui64>();
116+
DeactivatedLocksPriority = Max<i32>();
117117
}
118118

119-
void TLockableItem::DeactivateScheduledLocks(ui64 order)
119+
void TLockableItem::DeactivateScheduledLocks(i32 priority)
120120
{
121-
DeactivatedLocksOrder = order;
121+
DeactivatedLocksPriority = priority;
122122
}
123123

124124
void TLockableItem::RemoveScheduledLocks(const TString &requestId)
@@ -650,21 +650,30 @@ void TClusterInfo::ApplyActionWithoutLog(const NKikimrCms::TAction &action)
650650
case TAction::REBOOT_HOST:
651651
if (auto nodes = NodePtrs(action.GetHost(), MakeServices(action))) {
652652
for (const auto node : nodes) {
653-
for (auto &nodeGroup: node->NodeGroups)
654-
nodeGroup->LockNode(node->NodeId);
653+
for (auto &nodeGroup: node->NodeGroups) {
654+
if (!nodeGroup->IsNodeLocked(node->NodeId)) {
655+
nodeGroup->LockNode(node->NodeId);
656+
}
657+
}
655658
}
656659
}
657660
break;
658661
case TAction::REPLACE_DEVICES:
659662
for (const auto &device : action.GetDevices()) {
660663
if (HasPDisk(device)) {
661664
auto pdisk = &PDiskRef(device);
662-
for (auto &nodeGroup: NodeRef(pdisk->NodeId).NodeGroups)
663-
nodeGroup->LockNode(pdisk->NodeId);
665+
for (auto &nodeGroup: NodeRef(pdisk->NodeId).NodeGroups) {
666+
if (!nodeGroup->IsNodeLocked(pdisk->NodeId)) {
667+
nodeGroup->LockNode(pdisk->NodeId);
668+
}
669+
}
664670
} else if (HasVDisk(device)) {
665671
auto vdisk = &VDiskRef(device);
666-
for (auto &nodeGroup: NodeRef(vdisk->NodeId).NodeGroups)
667-
nodeGroup->LockNode(vdisk->NodeId);
672+
for (auto &nodeGroup: NodeRef(vdisk->NodeId).NodeGroups) {
673+
if (!nodeGroup->IsNodeLocked(vdisk->NodeId)) {
674+
nodeGroup->LockNode(vdisk->NodeId);
675+
}
676+
}
668677
}
669678
}
670679
break;
@@ -756,7 +765,7 @@ ui64 TClusterInfo::AddLocks(const TPermissionInfo &permission, const TActorConte
756765
|| permission.Action.GetType() == TAction::REBOOT_HOST
757766
|| permission.Action.GetType() == TAction::REPLACE_DEVICES)) {
758767
item->State = RESTART;
759-
lock = true;;
768+
lock = true;
760769
}
761770

762771
if (lock) {
@@ -854,7 +863,7 @@ ui64 TClusterInfo::ScheduleActions(const TRequestInfo &request, const TActorCont
854863
auto items = FindLockedItems(action, ctx);
855864

856865
for (auto item : items)
857-
item->ScheduleLock({action, request.Owner, request.RequestId, request.Order});
866+
item->ScheduleLock({action, request.Owner, request.RequestId, request.Priority});
858867

859868
locks += items.size();
860869
}
@@ -868,10 +877,10 @@ void TClusterInfo::UnscheduleActions(const TString &requestId)
868877
entry.second->RemoveScheduledLocks(requestId);
869878
}
870879

871-
void TClusterInfo::DeactivateScheduledLocks(ui64 order)
880+
void TClusterInfo::DeactivateScheduledLocks(i32 priority)
872881
{
873882
for (auto &entry : LockableItems)
874-
entry.second->DeactivateScheduledLocks(order);
883+
entry.second->DeactivateScheduledLocks(priority);
875884
}
876885

877886
void TClusterInfo::ReactivateScheduledLocks()
@@ -1020,22 +1029,30 @@ void TOperationLogManager::ApplyAction(const NKikimrCms::TAction &action,
10201029
case NKikimrCms::TAction::REBOOT_HOST:
10211030
if (auto nodes = clusterState->NodePtrs(action.GetHost(), MakeServices(action))) {
10221031
for (const auto node : nodes) {
1023-
for (auto &nodeGroup: node->NodeGroups)
1024-
AddNodeLockOperation(node->NodeId, nodeGroup);
1032+
for (auto &nodeGroup: node->NodeGroups) {
1033+
if (!nodeGroup->IsNodeLocked(node->NodeId)) {
1034+
AddNodeLockOperation(node->NodeId, nodeGroup);
1035+
}
1036+
}
10251037
}
10261038
}
10271039
break;
10281040
case NKikimrCms::TAction::REPLACE_DEVICES:
10291041
for (const auto &device : action.GetDevices()) {
10301042
if (clusterState->HasPDisk(device)) {
10311043
auto pdisk = &clusterState->PDisk(device);
1032-
for (auto &nodeGroup: clusterState->NodeRef(pdisk->NodeId).NodeGroups)
1033-
AddNodeLockOperation(pdisk->NodeId, nodeGroup);
1034-
1044+
for (auto &nodeGroup: clusterState->NodeRef(pdisk->NodeId).NodeGroups) {
1045+
if (!nodeGroup->IsNodeLocked(pdisk->NodeId)) {
1046+
AddNodeLockOperation(pdisk->NodeId, nodeGroup);
1047+
}
1048+
}
10351049
} else if (clusterState->HasVDisk(device)) {
10361050
auto vdisk = &clusterState->VDisk(device);
1037-
for (auto &nodeGroup: clusterState->NodeRef(vdisk->NodeId).NodeGroups)
1038-
AddNodeLockOperation(vdisk->NodeId, nodeGroup);
1051+
for (auto &nodeGroup: clusterState->NodeRef(vdisk->NodeId).NodeGroups) {
1052+
if (!nodeGroup->IsNodeLocked(vdisk->NodeId)) {
1053+
AddNodeLockOperation(vdisk->NodeId, nodeGroup);
1054+
}
1055+
}
10391056
}
10401057
}
10411058
break;

ydb/core/cms/cluster_info.h

+9-8
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,13 @@ struct TRequestInfo {
9797
request.SetPartialPermissionAllowed(Request.GetPartialPermissionAllowed());
9898
request.SetReason(Request.GetReason());
9999
request.SetAvailabilityMode(Request.GetAvailabilityMode());
100+
request.SetPriority(Priority);
100101
}
101102

102103
TString RequestId;
103104
TString Owner;
104105
ui64 Order = 0;
106+
i32 Priority = 0;
105107
NKikimrCms::TPermissionRequest Request;
106108
};
107109

@@ -203,10 +205,10 @@ class TLockableItem : public TThrRefBase {
203205
};
204206

205207
struct TScheduledLock : TBaseLock {
206-
TScheduledLock(const NKikimrCms::TAction &action, const TString &owner, const TString &requestId, ui64 order)
208+
TScheduledLock(const NKikimrCms::TAction &action, const TString &owner, const TString &requestId, i32 priority)
207209
: TBaseLock(owner, action)
208210
, RequestId(requestId)
209-
, Order(order)
211+
, Priority(priority)
210212
{
211213
}
212214

@@ -217,7 +219,7 @@ class TLockableItem : public TThrRefBase {
217219
TScheduledLock &operator=(TScheduledLock &&other) = default;
218220

219221
TString RequestId;
220-
ui64 Order = 0;
222+
i32 Priority = 0;
221223
};
222224

223225
struct TTemporaryLock : TBaseLock {
@@ -268,7 +270,7 @@ class TLockableItem : public TThrRefBase {
268270

269271
void ScheduleLock(TScheduledLock &&lock) {
270272
auto pos = LowerBound(ScheduledLocks.begin(), ScheduledLocks.end(), lock, [](auto &l, auto &r) {
271-
return l.Order < r.Order;
273+
return l.Priority < r.Priority;
272274
});
273275
ScheduledLocks.insert(pos, lock);
274276
}
@@ -278,7 +280,7 @@ class TLockableItem : public TThrRefBase {
278280

279281
void RollbackLocks(ui64 point);
280282

281-
void DeactivateScheduledLocks(ui64 order);
283+
void DeactivateScheduledLocks(i32 priority);
282284
void ReactivateScheduledLocks();
283285
void RemoveScheduledLocks(const TString &requestId);
284286

@@ -296,7 +298,7 @@ class TLockableItem : public TThrRefBase {
296298
std::list<TExternalLock> ExternalLocks;
297299
std::list<TScheduledLock> ScheduledLocks;
298300
TVector<TTemporaryLock> TempLocks;
299-
ui64 DeactivatedLocksOrder = Max<ui64>();
301+
i32 DeactivatedLocksPriority = Max<i32>();
300302
THashSet<NKikimrCms::EMarker> Markers;
301303
};
302304

@@ -667,7 +669,6 @@ class TClusterInfo : public TThrRefBase {
667669
TOperationLogManager LogManager;
668670
TOperationLogManager ScheduledLogManager;
669671

670-
void ApplyActionToOperationLog(const NKikimrCms::TAction &action);
671672
void ApplyActionWithoutLog(const NKikimrCms::TAction &action);
672673
void ApplyNodeLimits(ui32 clusterLimit, ui32 clusterRatioLimit, ui32 tenantLimit, ui32 tenantRatioLimit);
673674

@@ -912,7 +913,7 @@ class TClusterInfo : public TThrRefBase {
912913
ui64 AddTempLocks(const NKikimrCms::TAction &action, const TActorContext *ctx);
913914
ui64 ScheduleActions(const TRequestInfo &request, const TActorContext *ctx);
914915
void UnscheduleActions(const TString &requestId);
915-
void DeactivateScheduledLocks(ui64 order);
916+
void DeactivateScheduledLocks(i32 priority);
916917
void ReactivateScheduledLocks();
917918

918919
void RollbackLocks(ui64 point);

ydb/core/cms/cluster_info_ut.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -199,33 +199,33 @@ void AddActions(TRequestInfo &request, const NKikimrCms::TAction &action, Ts...
199199
}
200200

201201
template<typename... Ts>
202-
TRequestInfo MakeRequest(const TString &id, const TString &owner, ui64 order, Ts... actions)
202+
TRequestInfo MakeRequest(const TString &id, const TString &owner, i32 priority, Ts... actions)
203203
{
204204
TRequestInfo res;
205205
res.RequestId = id;
206206
res.Owner = owner;
207-
res.Order = order;
207+
res.Priority = priority;
208208
AddActions(res, actions...);
209209
return res;
210210
}
211211

212212
template<typename I>
213-
void CheckScheduledLocks(I pos, I end, const TString &id, const TString &owner, ui64 order)
213+
void CheckScheduledLocks(I pos, I end, const TString &id, const TString &owner, i32 priority)
214214
{
215215
UNIT_ASSERT(pos != end);
216216
UNIT_ASSERT_VALUES_EQUAL(pos->RequestId, id);
217217
UNIT_ASSERT_VALUES_EQUAL(pos->Owner, owner);
218-
UNIT_ASSERT_VALUES_EQUAL(pos->Order, order);
218+
UNIT_ASSERT_VALUES_EQUAL(pos->Priority, priority);
219219
UNIT_ASSERT(++pos == end);
220220
}
221221

222222
template<typename I, typename... Ts>
223-
void CheckScheduledLocks(I pos, I end, const TString &id, const TString &owner, ui64 order, Ts... locks)
223+
void CheckScheduledLocks(I pos, I end, const TString &id, const TString &owner, i32 priority, Ts... locks)
224224
{
225225
UNIT_ASSERT(pos != end);
226226
UNIT_ASSERT_VALUES_EQUAL(pos->RequestId, id);
227227
UNIT_ASSERT_VALUES_EQUAL(pos->Owner, owner);
228-
UNIT_ASSERT_VALUES_EQUAL(pos->Order, order);
228+
UNIT_ASSERT_VALUES_EQUAL(pos->Priority, priority);
229229
CheckScheduledLocks(++pos, end, locks...);
230230
}
231231

@@ -442,7 +442,7 @@ Y_UNIT_TEST_SUITE(TClusterInfoTest) {
442442
"request-3", "user-3", 3,
443443
"request-4", "user-4", 4);
444444

445-
cluster->DeactivateScheduledLocks(request2.Order);
445+
cluster->DeactivateScheduledLocks(request2.Priority);
446446

447447
TErrorInfo error;
448448
UNIT_ASSERT(cluster->Node(1).IsLocked(error, TDuration(), Now(), TDuration()));

0 commit comments

Comments
 (0)