Skip to content

Commit 3d455e9

Browse files
authored
Merge c6c8e41 into 3848f64
2 parents 3848f64 + c6c8e41 commit 3d455e9

File tree

2 files changed

+49
-8
lines changed

2 files changed

+49
-8
lines changed

ydb/core/health_check/health_check.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -2090,7 +2090,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
20902090
++DisksColors[status];
20912091
switch (status) {
20922092
case Ydb::Monitoring::StatusFlag::BLUE: // disk is good, but not available
2093-
case Ydb::Monitoring::StatusFlag::YELLOW: // disk is initializing, not currently available
2093+
// No yellow or orange status here - this is intentional - they are used when a disk is running out of space, but is currently available
20942094
case Ydb::Monitoring::StatusFlag::RED: // disk is bad, probably not available
20952095
case Ydb::Monitoring::StatusFlag::GREY: // the status is absent, the disk is not available
20962096
IncrementFor(realm);
@@ -2106,7 +2106,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
21062106
if (ErasureSpecies == NONE) {
21072107
if (FailedDisks > 0) {
21082108
context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Group failed", ETags::GroupState, {ETags::VDiskState});
2109-
} else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0) {
2109+
} else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0 || DisksColors[Ydb::Monitoring::StatusFlag::ORANGE] > 0) {
21102110
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState});
21112111
}
21122112
} else if (ErasureSpecies == BLOCK_4_2) {
@@ -2120,7 +2120,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
21202120
} else {
21212121
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState});
21222122
}
2123-
} else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0) {
2123+
} else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0 || DisksColors[Ydb::Monitoring::StatusFlag::ORANGE] > 0) {
21242124
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState});
21252125
}
21262126
} else if (ErasureSpecies == MIRROR_3_DC) {
@@ -2134,7 +2134,7 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
21342134
} else {
21352135
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState});
21362136
}
2137-
} else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0) {
2137+
} else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0 || DisksColors[Ydb::Monitoring::StatusFlag::ORANGE] > 0) {
21382138
context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState});
21392139
}
21402140
}

ydb/core/health_check/health_check_ut.cpp

+45-4
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
5858

5959
const int GROUP_START_ID = 0x80000000;
6060
const int VCARD_START_ID = 55;
61+
const int PDISK_START_ID = 42;
6162
const int DEFAULT_GROUP_GENERATION = 3;
6263

6364
const TPathId SUBDOMAIN_KEY = {7000000000, 1};
@@ -181,7 +182,8 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
181182
}
182183

183184
void AddVSlotsToSysViewResponse(NSysView::TEvSysView::TEvGetVSlotsResponse::TPtr* ev, size_t groupCount,
184-
const TVDisks& vslots, ui32 groupStartId = GROUP_START_ID) {
185+
const TVDisks& vslots, ui32 groupStartId = GROUP_START_ID,
186+
bool withPdisk = false) {
185187
auto& record = (*ev)->Get()->Record;
186188
auto entrySample = record.entries(0);
187189
record.clear_entries();
@@ -190,10 +192,14 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
190192
const auto *descriptor = NKikimrBlobStorage::EVDiskStatus_descriptor();
191193
for (size_t i = 0; i < groupCount; ++i) {
192194
auto vslotId = VCARD_START_ID;
195+
auto pdiskId = PDISK_START_ID;
193196
for (const auto& vslot : vslots) {
194197
auto* entry = record.add_entries();
195198
entry->CopyFrom(entrySample);
196199
entry->mutable_key()->set_vslotid(vslotId);
200+
if (withPdisk) {
201+
entry->mutable_key()->set_pdiskid(pdiskId);
202+
}
197203
entry->mutable_info()->set_groupid(groupId);
198204
entry->mutable_info()->set_failrealm(vslotId);
199205
if (vslot.Status) {
@@ -202,6 +208,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
202208
entry->mutable_info()->set_groupgeneration(vslot.Generation);
203209
entry->mutable_info()->set_vdisk(vslotId);
204210
++vslotId;
211+
++pdiskId;
205212
}
206213
++groupId;
207214
}
@@ -215,6 +222,22 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
215222
entry->mutable_info()->set_name(STORAGE_POOL_NAME);
216223
}
217224

225+
void AddPDisksToSysViewResponse(NSysView::TEvSysView::TEvGetPDisksResponse::TPtr* ev, size_t count, double occupancy) {
226+
auto& record = (*ev)->Get()->Record;
227+
auto entrySample = record.entries(0);
228+
record.clear_entries();
229+
auto pdiskId = PDISK_START_ID;
230+
const size_t totalSize = 3'200'000'000'000ull;
231+
for (size_t i = 0; i < count; ++i) {
232+
auto* entry = record.add_entries();
233+
entry->CopyFrom(entrySample);
234+
entry->mutable_key()->set_pdiskid(pdiskId);
235+
entry->mutable_info()->set_totalsize(totalSize);
236+
entry->mutable_info()->set_availablesize((1 - occupancy) * totalSize);
237+
++pdiskId;
238+
}
239+
}
240+
218241
void AddGroupVSlotInControllerConfigResponseWithStaticGroup(TEvBlobStorage::TEvControllerConfigResponse::TPtr* ev,
219242
const NKikimrBlobStorage::TGroupStatus::E groupStatus, const TVDisks& vslots)
220243
{
@@ -415,7 +438,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
415438
CheckHcResult(result, groupNumber, vdiscPerGroupNumber, isMergeRecords);
416439
}
417440

418-
Ydb::Monitoring::SelfCheckResult RequestHcWithVdisks(const NKikimrBlobStorage::TGroupStatus::E groupStatus, const TVDisks& vdisks, bool forStaticGroup = false) {
441+
Ydb::Monitoring::SelfCheckResult RequestHcWithVdisks(const NKikimrBlobStorage::TGroupStatus::E groupStatus, const TVDisks& vdisks, bool forStaticGroup = false, double occupancy = 0) {
419442
TPortManager tp;
420443
ui16 port = tp.GetPort(2134);
421444
ui16 grpcPort = tp.GetPort(2135);
@@ -451,12 +474,17 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
451474
case NSysView::TEvSysView::EvGetVSlotsResponse: {
452475
auto* x = reinterpret_cast<NSysView::TEvSysView::TEvGetVSlotsResponse::TPtr*>(&ev);
453476
if (forStaticGroup) {
454-
AddVSlotsToSysViewResponse(x, 1, vdisks, 0);
477+
AddVSlotsToSysViewResponse(x, 1, vdisks, 0, true);
455478
} else {
456-
AddVSlotsToSysViewResponse(x, 1, vdisks);
479+
AddVSlotsToSysViewResponse(x, 1, vdisks, GROUP_START_ID, true);
457480
}
458481
break;
459482
}
483+
case NSysView::TEvSysView::EvGetPDisksResponse: {
484+
auto* x = reinterpret_cast<NSysView::TEvSysView::TEvGetPDisksResponse::TPtr*>(&ev);
485+
AddPDisksToSysViewResponse(x, vdisks.size(), occupancy);
486+
break;
487+
}
460488
case NSysView::TEvSysView::EvGetGroupsResponse: {
461489
auto* x = reinterpret_cast<NSysView::TEvSysView::TEvGetGroupsResponse::TPtr*>(&ev);
462490
AddGroupsToSysViewResponse(x);
@@ -669,6 +697,19 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
669697
UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::GOOD);
670698
}
671699

700+
Y_UNIT_TEST(YellowGroupIssueOnYellowSpace) {
701+
auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{3, NKikimrBlobStorage::READY}, false, 0.9);
702+
Cerr << result.ShortDebugString() << Endl;
703+
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 1);
704+
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 0);
705+
}
706+
707+
Y_UNIT_TEST(RedGroupIssueOnRedSpace) {
708+
auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{3, NKikimrBlobStorage::READY}, false, 0.95);
709+
Cerr << result.ShortDebugString() << Endl;
710+
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 1);
711+
}
712+
672713
/* HC currently infers group status on its own, so it's never unknown
673714
Y_UNIT_TEST(RedGroupIssueWhenUnknownGroupStatus) {
674715
auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::UNKNOWN, {});

0 commit comments

Comments
 (0)