Skip to content

Commit 780e345

Browse files
authored
24-3: Add disable evict vdisks option to config (#9812) (#10339)
1 parent d81aef4 commit 780e345

File tree

5 files changed

+87
-2
lines changed

5 files changed

+87
-2
lines changed

ydb/core/cms/cms.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,12 @@ bool TCms::CheckEvictVDisks(const TAction &action, TErrorInfo &error) const {
563563
return false;
564564
}
565565

566+
if (State->Config.SentinelConfig.EvictVDisksStatus.Empty()) {
567+
error.Code = TStatus::ERROR;
568+
error.Reason = "Evict vdisks is disabled in Sentinel (self heal)";
569+
return false;
570+
}
571+
566572
switch (action.GetType()) {
567573
case TAction::RESTART_SERVICES:
568574
case TAction::SHUTDOWN_HOST:

ydb/core/cms/cms_ut.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1951,6 +1951,46 @@ Y_UNIT_TEST_SUITE(TCmsTest) {
19511951
env.CheckDonePermission("user", permission2.GetPermissions(0).GetId());
19521952
}
19531953

1954+
Y_UNIT_TEST(DisabledEvictVDisks)
1955+
{
1956+
auto opts = TTestEnvOpts(8).WithSentinel();
1957+
TCmsTestEnv env(opts);
1958+
env.SetLogPriority(NKikimrServices::CMS, NLog::PRI_DEBUG);
1959+
1960+
// Make transition faster for tests purposes
1961+
auto cmsConfig = env.GetCmsConfig();
1962+
cmsConfig.MutableSentinelConfig()->SetDefaultStateLimit(1);
1963+
env.SetCmsConfig(cmsConfig);
1964+
1965+
// Evict VDisks
1966+
auto request = env.CheckPermissionRequest(
1967+
MakePermissionRequest(TRequestOptions("user").WithEvictVDisks(),
1968+
MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(0), 600000000, "storage")
1969+
),
1970+
TStatus::DISALLOW_TEMP // ok, waiting for move VDisks
1971+
);
1972+
1973+
// Check that FAULTY BSC request is sent
1974+
env.CheckBSCUpdateRequests({ env.GetNodeId(0) }, NKikimrBlobStorage::FAULTY);
1975+
1976+
// Disable VDisks eviction
1977+
cmsConfig.MutableSentinelConfig()->SetEvictVDisksStatus(NKikimrCms::TCmsConfig::TSentinelConfig::DISABLED);
1978+
env.SetCmsConfig(cmsConfig);
1979+
1980+
// Check that ACTIVE BSC request is sent
1981+
env.CheckBSCUpdateRequests({ env.GetNodeId(0) }, NKikimrBlobStorage::ACTIVE);
1982+
1983+
// Check that CMS returns ERROR when VDisks eviction is disabled
1984+
env.CheckRequest("user", request.GetRequestId(), false, TStatus::ERROR, 0);
1985+
1986+
// Enable VDisks eviction again
1987+
cmsConfig.MutableSentinelConfig()->SetEvictVDisksStatus(NKikimrCms::TCmsConfig::TSentinelConfig::FAULTY);
1988+
env.SetCmsConfig(cmsConfig);
1989+
1990+
// Check that FAULTY BSC request is sent again
1991+
env.CheckBSCUpdateRequests({ env.GetNodeId(0) }, NKikimrBlobStorage::FAULTY);
1992+
}
1993+
19541994
Y_UNIT_TEST(EmergencyDuringRollingRestart)
19551995
{
19561996
TCmsTestEnv env(8);

ydb/core/cms/config.h

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
#pragma once
22

33
#include "pdisk_state.h"
4+
#include "pdisk_status.h"
45

56
#include <ydb/core/protos/cms.pb.h>
67

78
#include <util/datetime/base.h>
89
#include <util/generic/hash.h>
910
#include <util/generic/map.h>
11+
#include <util/generic/maybe.h>
1012

1113
namespace NKikimr::NCms {
1214

@@ -30,6 +32,8 @@ struct TCmsSentinelConfig {
3032
ui32 RoomRatio;
3133
ui32 RackRatio;
3234

35+
TMaybeFail<EPDiskStatus> EvictVDisksStatus;
36+
3337
void Serialize(NKikimrCms::TCmsConfig::TSentinelConfig &config) const {
3438
config.SetEnable(Enable);
3539
config.SetDryRun(DryRun);
@@ -45,6 +49,7 @@ struct TCmsSentinelConfig {
4549
config.SetRackRatio(RackRatio);
4650

4751
SaveStateLimits(config);
52+
SaveEvictVDisksStatus(config);
4853
}
4954

5055
void Deserialize(const NKikimrCms::TCmsConfig::TSentinelConfig &config) {
@@ -63,6 +68,8 @@ struct TCmsSentinelConfig {
6368

6469
auto newStateLimits = LoadStateLimits(config);
6570
StateLimits.swap(newStateLimits);
71+
72+
EvictVDisksStatus = LoadEvictVDisksStatus(config);
6673
}
6774

6875
void SaveStateLimits(NKikimrCms::TCmsConfig::TSentinelConfig &config) const {
@@ -129,6 +136,31 @@ struct TCmsSentinelConfig {
129136

130137
return stateLimits;
131138
}
139+
140+
static TMaybeFail<EPDiskStatus> LoadEvictVDisksStatus(const NKikimrCms::TCmsConfig::TSentinelConfig &config) {
141+
using EEvictVDisksStatus = NKikimrCms::TCmsConfig::TSentinelConfig;
142+
switch (config.GetEvictVDisksStatus()) {
143+
case EEvictVDisksStatus::UNKNOWN:
144+
case EEvictVDisksStatus::FAULTY:
145+
return EPDiskStatus::FAULTY;
146+
case EEvictVDisksStatus::DISABLED:
147+
return Nothing();
148+
}
149+
return EPDiskStatus::FAULTY;
150+
}
151+
152+
void SaveEvictVDisksStatus(NKikimrCms::TCmsConfig::TSentinelConfig &config) const {
153+
using EEvictVDisksStatus = NKikimrCms::TCmsConfig::TSentinelConfig;
154+
155+
if (EvictVDisksStatus.Empty()) {
156+
config.SetEvictVDisksStatus(EEvictVDisksStatus::DISABLED);
157+
return;
158+
}
159+
160+
if (*EvictVDisksStatus == EPDiskStatus::FAULTY) {
161+
config.SetEvictVDisksStatus(EEvictVDisksStatus::FAULTY);
162+
}
163+
}
132164
};
133165

134166
struct TCmsLogConfig {

ydb/core/cms/sentinel.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -895,8 +895,8 @@ class TSentinel: public TActorBootstrapped<TSentinel> {
895895
continue;
896896
}
897897

898-
if (it->second.HasFaultyMarker()) {
899-
info.SetForcedStatus(EPDiskStatus::FAULTY);
898+
if (it->second.HasFaultyMarker() && Config.EvictVDisksStatus.Defined()) {
899+
info.SetForcedStatus(*Config.EvictVDisksStatus);
900900
} else {
901901
info.ResetForcedStatus();
902902
}

ydb/core/protos/cms.proto

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,12 @@ message TCmsConfig {
430430
optional uint32 Limit = 2;
431431
}
432432

433+
enum EEvictVDisksStatus {
434+
UNKNOWN = 0;
435+
DISABLED = 1;
436+
FAULTY = 2;
437+
}
438+
433439
optional bool Enable = 1 [default = true];
434440
// Updater's config
435441
optional uint64 UpdateConfigInterval = 2 [default = 3600000000];
@@ -449,6 +455,7 @@ message TCmsConfig {
449455

450456
optional bool DryRun = 13;
451457
repeated TStateLimit StateLimits = 14;
458+
optional EEvictVDisksStatus EvictVDisksStatus = 15;
452459
}
453460

454461
message TLogConfig {

0 commit comments

Comments
 (0)