Skip to content

Commit 42c8024

Browse files
vporyadkealexvru
andauthored
Fix reporting of initial VDisk status to SysView (#8853) (#8940)
Co-authored-by: Alexander Rutkovsky <[email protected]>
1 parent 43f08d4 commit 42c8024

11 files changed

+41
-19
lines changed

ydb/core/mind/bscontroller/bsc.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ void TBlobStorageController::Handle(TEvNodeWardenStorageConfig::TPtr ev) {
131131
auto prevStaticVSlots = std::exchange(StaticVSlots, {});
132132
StaticVDiskMap.clear();
133133

134+
const TMonotonic mono = TActivationContext::Monotonic();
135+
134136
if (StorageConfig.HasBlobStorageConfig()) {
135137
if (const auto& bsConfig = StorageConfig.GetBlobStorageConfig(); bsConfig.HasServiceSet()) {
136138
const auto& ss = bsConfig.GetServiceSet();
@@ -143,7 +145,7 @@ void TBlobStorageController::Handle(TEvNodeWardenStorageConfig::TPtr ev) {
143145
const auto& location = vslot.GetVDiskLocation();
144146
const TPDiskId pdiskId(location.GetNodeID(), location.GetPDiskID());
145147
const TVSlotId vslotId(pdiskId, location.GetVDiskSlotID());
146-
StaticVSlots.try_emplace(vslotId, vslot, prevStaticVSlots);
148+
StaticVSlots.try_emplace(vslotId, vslot, prevStaticVSlots, mono);
147149
const TVDiskID& vdiskId = VDiskIDFromVDiskID(vslot.GetVDiskID());
148150
StaticVDiskMap.emplace(vdiskId, vslotId);
149151
StaticVDiskMap.emplace(TVDiskID(vdiskId.GroupID, 0, vdiskId), vslotId);

ydb/core/mind/bscontroller/config.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ namespace NKikimr {
101101

102102
// when the config cmd received
103103
const TInstant Timestamp;
104+
const TMonotonic Mono;
104105

105106
// various settings from controller
106107
const bool DonorMode;
@@ -124,7 +125,8 @@ namespace NKikimr {
124125
bool PushStaticGroupsToSelfHeal = false;
125126

126127
public:
127-
TConfigState(TBlobStorageController &controller, const THostRecordMap &hostRecords, TInstant timestamp)
128+
TConfigState(TBlobStorageController &controller, const THostRecordMap &hostRecords, TInstant timestamp,
129+
TMonotonic mono)
128130
: Self(controller)
129131
, HostConfigs(&controller.HostConfigs)
130132
, Boxes(&controller.Boxes)
@@ -142,6 +144,7 @@ namespace NKikimr {
142144
, NextStoragePoolId(&controller.NextStoragePoolId)
143145
, HostRecords(hostRecords)
144146
, Timestamp(timestamp)
147+
, Mono(mono)
145148
, DonorMode(controller.DonorMode)
146149
, DefaultMaxSlots(controller.DefaultMaxSlots)
147150
, StaticVSlots(controller.StaticVSlots)

ydb/core/mind/bscontroller/config_cmd.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ namespace NKikimr::NBsController {
180180
Response->MutableStatus()->RemoveLast();
181181
}
182182

183-
State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
183+
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
184184
State->CheckConsistency();
185185

186186
TString m;

ydb/core/mind/bscontroller/config_fit_groups.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,7 @@ namespace NKikimr {
607607
groupInfo->ID, 0, groupInfo->Generation, StoragePool.VDiskKind, failRealmIdx,
608608
failDomainIdx, vdiskIdx, TMood::Normal, groupInfo, &VSlotReadyTimestampQ,
609609
TInstant::Zero(), TDuration::Zero());
610+
vslotInfo->VDiskStatusTimestamp = State.Mono;
610611

611612
// mark as uncommitted
612613
State.UncommittedVSlots.insert(vslotId);

ydb/core/mind/bscontroller/drop_donor.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class TBlobStorageController::TTxDropDonor
1818
TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_DROP_DONOR; }
1919

2020
bool Execute(TTransactionContext &txc, const TActorContext&) override {
21-
State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
21+
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
2222
State->CheckConsistency();
2323
for (const TVSlotId& vslotId : VSlotIds) {
2424
if (const TVSlotInfo *vslot = State->VSlots.Find(vslotId); vslot && !vslot->IsBeingDeleted()) {

ydb/core/mind/bscontroller/impl.h

+5-3
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
125125

126126
public:
127127
std::optional<NKikimrBlobStorage::EVDiskStatus> VDiskStatus;
128-
NHPTimer::STime VDiskStatusTimestamp = GetCycleCountFast();
128+
TMonotonic VDiskStatusTimestamp;
129129
bool IsReady = false;
130130
bool OnlyPhantomsRemain = false;
131131

@@ -2308,11 +2308,11 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
23082308

23092309
std::optional<NKikimrBlobStorage::TVDiskMetrics> VDiskMetrics;
23102310
std::optional<NKikimrBlobStorage::EVDiskStatus> VDiskStatus;
2311-
NHPTimer::STime VDiskStatusTimestamp = GetCycleCountFast();
2311+
TMonotonic VDiskStatusTimestamp;
23122312
TMonotonic ReadySince = TMonotonic::Max(); // when IsReady becomes true for this disk; Max() in non-READY state
23132313

23142314
TStaticVSlotInfo(const NKikimrBlobStorage::TNodeWardenServiceSet::TVDisk& vdisk,
2315-
std::map<TVSlotId, TStaticVSlotInfo>& prev)
2315+
std::map<TVSlotId, TStaticVSlotInfo>& prev, TMonotonic mono)
23162316
: VDiskId(VDiskIDFromVDiskID(vdisk.GetVDiskID()))
23172317
, VDiskKind(vdisk.GetVDiskKind())
23182318
{
@@ -2324,6 +2324,8 @@ class TBlobStorageController : public TActor<TBlobStorageController>, public TTa
23242324
VDiskStatus = item.VDiskStatus;
23252325
VDiskStatusTimestamp = item.VDiskStatusTimestamp;
23262326
ReadySince = item.ReadySince;
2327+
} else {
2328+
VDiskStatusTimestamp = mono;
23272329
}
23282330
}
23292331
};

ydb/core/mind/bscontroller/load_everything.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ class TBlobStorageController::TTxLoadEverything : public TTransactionBase<TBlobS
352352
}
353353

354354
// VSlots
355+
const TMonotonic mono = TActivationContext::Monotonic();
355356
Self->VSlots.clear();
356357
{
357358
using T = Schema::VSlot;
@@ -374,6 +375,7 @@ class TBlobStorageController::TTxLoadEverything : public TTransactionBase<TBlobS
374375
if (x.LastSeenReady != TInstant::Zero()) {
375376
Self->NotReadyVSlotIds.insert(x.VSlotId);
376377
}
378+
x.VDiskStatusTimestamp = mono;
377379

378380
if (!slot.Next()) {
379381
return false;

ydb/core/mind/bscontroller/node_report.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class TBlobStorageController::TTxNodeReport
2626
return true;
2727
}
2828

29-
State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
29+
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
3030
State->CheckConsistency();
3131

3232
NIceDb::TNiceDb db(txc.DB);

ydb/core/mind/bscontroller/register_node.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ class TBlobStorageController::TTxUpdateNodeDrives
160160
bool Execute(TTransactionContext& txc, const TActorContext&) override {
161161
const TNodeId nodeId = Record.GetNodeId();
162162

163-
State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
163+
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
164164
State->CheckConsistency();
165165

166166
auto updateIsSuccessful = true;

ydb/core/mind/bscontroller/sys_view.cpp

+19-7
Original file line numberDiff line numberDiff line change
@@ -325,8 +325,8 @@ void CopyInfo(NKikimrSysView::TPDiskInfo* info, const THolder<TBlobStorageContro
325325
}
326326

327327
void SerializeVSlotInfo(NKikimrSysView::TVSlotInfo *pb, const TVDiskID& vdiskId, const NKikimrBlobStorage::TVDiskMetrics& m,
328-
std::optional<NKikimrBlobStorage::EVDiskStatus> status, NHPTimer::STime statusTimestamp,
329-
NKikimrBlobStorage::TVDiskKind::EVDiskKind kind, bool isBeingDeleted) {
328+
std::optional<NKikimrBlobStorage::EVDiskStatus> status, NKikimrBlobStorage::TVDiskKind::EVDiskKind kind,
329+
bool isBeingDeleted) {
330330
pb->SetGroupId(vdiskId.GroupID.GetRawId());
331331
pb->SetGroupGeneration(vdiskId.GroupGeneration);
332332
pb->SetFailRealm(vdiskId.FailRealm);
@@ -338,9 +338,6 @@ void SerializeVSlotInfo(NKikimrSysView::TVSlotInfo *pb, const TVDiskID& vdiskId,
338338
if (m.HasAvailableSize()) {
339339
pb->SetAvailableSize(m.GetAvailableSize());
340340
}
341-
if (!status && CyclesToDuration(GetCycleCountFast() - statusTimestamp) > TDuration::Seconds(15)) {
342-
status = NKikimrBlobStorage::EVDiskStatus::ERROR;
343-
}
344341
if (status) {
345342
pb->SetStatusV2(NKikimrBlobStorage::EVDiskStatus_Name(*status));
346343
}
@@ -352,7 +349,7 @@ void SerializeVSlotInfo(NKikimrSysView::TVSlotInfo *pb, const TVDiskID& vdiskId,
352349

353350
void CopyInfo(NKikimrSysView::TVSlotInfo* info, const THolder<TBlobStorageController::TVSlotInfo>& vSlotInfo) {
354351
SerializeVSlotInfo(info, vSlotInfo->GetVDiskId(), vSlotInfo->Metrics, vSlotInfo->VDiskStatus,
355-
vSlotInfo->VDiskStatusTimestamp, vSlotInfo->Kind, vSlotInfo->IsBeingDeleted());
352+
vSlotInfo->Kind, vSlotInfo->IsBeingDeleted());
356353
}
357354

358355
void CopyInfo(NKikimrSysView::TGroupInfo* info, const THolder<TBlobStorageController::TGroupInfo>& groupInfo) {
@@ -428,6 +425,21 @@ void TBlobStorageController::UpdateSystemViews() {
428425
return;
429426
}
430427

428+
const TMonotonic now = TActivationContext::Monotonic();
429+
const TDuration expiration = TDuration::Seconds(15);
430+
for (auto& [key, value] : VSlots) {
431+
if (!value->VDiskStatus && value->VDiskStatusTimestamp + expiration <= now) {
432+
value->VDiskStatus = NKikimrBlobStorage::ERROR;
433+
SysViewChangedVSlots.insert(key);
434+
}
435+
}
436+
for (auto& [key, value] : StaticVSlots) {
437+
if (!value.VDiskStatus && value.VDiskStatusTimestamp + expiration <= now) {
438+
value.VDiskStatus = NKikimrBlobStorage::ERROR;
439+
SysViewChangedVSlots.insert(key);
440+
}
441+
}
442+
431443
if (!SysViewChangedPDisks.empty() || !SysViewChangedVSlots.empty() || !SysViewChangedGroups.empty() ||
432444
!SysViewChangedStoragePools.empty() || SysViewChangedSettings) {
433445
auto update = MakeHolder<TEvControllerUpdateSystemViews>();
@@ -468,7 +480,7 @@ void TBlobStorageController::UpdateSystemViews() {
468480
if (SysViewChangedVSlots.count(vslotId)) {
469481
static const NKikimrBlobStorage::TVDiskMetrics zero;
470482
SerializeVSlotInfo(&state.VSlots[vslotId], vslot.VDiskId, vslot.VDiskMetrics ? *vslot.VDiskMetrics : zero,
471-
vslot.VDiskStatus, vslot.VDiskStatusTimestamp, vslot.VDiskKind, false);
483+
vslot.VDiskStatus, vslot.VDiskKind, false);
472484
}
473485
}
474486
if (StorageConfig.HasBlobStorageConfig()) {

ydb/core/mind/bscontroller/virtual_group.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ namespace NKikimr::NBsController {
248248
if (const TGroupInfo *group = Self->FindGroup(GroupId); !group || group->VirtualGroupSetupMachineId != MachineId) {
249249
return true; // another machine is already running
250250
}
251-
State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
251+
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
252252
TGroupInfo *group = State->Groups.FindForUpdate(GroupId);
253253
Y_ABORT_UNLESS(group);
254254
if (!Callback(*group, *State)) {
@@ -294,7 +294,7 @@ namespace NKikimr::NBsController {
294294
if (Token.expired()) {
295295
return true; // actor is already dead
296296
}
297-
State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
297+
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
298298
const size_t n = State->BlobDepotDeleteQueue.Unshare().erase(GroupId);
299299
Y_ABORT_UNLESS(n == 1);
300300
TString error;
@@ -897,7 +897,7 @@ namespace NKikimr::NBsController {
897897
TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_DECOMMIT_GROUP; }
898898

899899
bool Execute(TTransactionContext& txc, const TActorContext&) override {
900-
State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
900+
State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic());
901901
Action(*State);
902902
TString error;
903903
if (State->Changed() && !Self->CommitConfigUpdates(*State, true, true, true, txc, &error)) {

0 commit comments

Comments
 (0)