Skip to content

Correct PDisk/VDisk metrics reporting #5776

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions ydb/core/blobstorage/nodewarden/node_warden_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,6 @@ void TNodeWarden::Handle(NPDisk::TEvSlayResult::TPtr ev) {
SendVDiskReport(vslotId, msg.VDiskId, NKikimrBlobStorage::TEvControllerNodeReport::WIPED);
TVDiskRecord& vdisk = vdiskIt->second;
StartLocalVDiskActor(vdisk); // restart actor after successful wiping
SendDiskMetrics(false);
}
break;

Expand Down Expand Up @@ -540,6 +539,8 @@ void TNodeWarden::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatus::TPtr ev)

auto& record = ev->Get()->Record;

std::unique_ptr<TEvBlobStorage::TEvControllerUpdateDiskStatus> updateDiskStatus;

for (const NKikimrBlobStorage::TVDiskMetrics& m : record.GetVDisksMetrics()) {
Y_ABORT_UNLESS(m.HasVSlotId());
const TVSlotId vslotId(m.GetVSlotId());
Expand All @@ -554,8 +555,11 @@ void TNodeWarden::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatus::TPtr ev)
VDisksWithUnreportedMetrics.PushBack(&vdisk);
}
} else {
if (!updateDiskStatus) {
updateDiskStatus.reset(new TEvBlobStorage::TEvControllerUpdateDiskStatus);
}
updateDiskStatus->Record.AddVDisksMetrics()->CopyFrom(m);
vdisk.VDiskMetrics.emplace(m);
VDisksWithUnreportedMetrics.PushBack(&vdisk);
}
}
}
Expand All @@ -571,11 +575,18 @@ void TNodeWarden::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatus::TPtr ev)
PDisksWithUnreportedMetrics.PushBack(&pdisk);
}
} else {
if (!updateDiskStatus) {
updateDiskStatus.reset(new TEvBlobStorage::TEvControllerUpdateDiskStatus);
}
updateDiskStatus->Record.AddPDisksMetrics()->CopyFrom(m);
pdisk.PDiskMetrics.emplace(m);
PDisksWithUnreportedMetrics.PushBack(&pdisk);
}
}
}

if (updateDiskStatus) {
SendToController(std::move(updateDiskStatus));
}
}

void TNodeWarden::Handle(TEvBlobStorage::TEvControllerGroupMetricsExchange::TPtr ev) {
Expand Down Expand Up @@ -637,7 +648,7 @@ void TNodeWarden::Handle(TEvStatusUpdate::TPtr ev) {
auto& vdisk = it->second;
vdisk.Status = msg->Status;
vdisk.OnlyPhantomsRemain = msg->OnlyPhantomsRemain;
SendDiskMetrics(false);
VDiskStatusChanged = true;

if (msg->Status == NKikimrBlobStorage::EVDiskStatus::READY && vdisk.WhiteboardVDiskId) {
Send(WhiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvVDiskDropDonors(*vdisk.WhiteboardVDiskId,
Expand Down
7 changes: 7 additions & 0 deletions ydb/core/blobstorage/nodewarden/node_warden_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -582,6 +582,8 @@ namespace NKikimr::NStorage {
IActor *CreateGroupResolverActor(ui32 groupId);
void Handle(TEvNodeWardenQueryGroupInfo::TPtr ev);

bool VDiskStatusChanged = false;

STATEFN(StateOnline) {
switch (ev->GetTypeRewrite()) {
fFunc(TEvBlobStorage::TEvPut::EventType, HandleForwarded);
Expand Down Expand Up @@ -663,6 +665,11 @@ namespace NKikimr::NStorage {
EnqueuePendingMessage(ev);
break;
}

if (VDiskStatusChanged) {
SendDiskMetrics(false);
VDiskStatusChanged = false;
}
}
};

Expand Down
1 change: 0 additions & 1 deletion ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,6 @@ namespace NKikimr::NStorage {
StartLocalVDiskActor(value);
}
}
SendDiskMetrics(false);

vdisks << "}";
STLOG(PRI_NOTICE, BS_NODE, NW74, "RestartLocalPDisk has finished",
Expand Down
24 changes: 13 additions & 11 deletions ydb/core/blobstorage/nodewarden/node_warden_pipe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,22 @@ void TNodeWarden::EstablishPipe() {
STLOG(PRI_DEBUG, BS_NODE, NW21, "EstablishPipe", (AvailDomainId, AvailDomainId),
(PipeClientId, PipeClientId), (ControllerId, controllerId));

for (auto& [key, pdisk] : LocalPDisks) {
if (pdisk.PDiskMetrics) {
PDisksWithUnreportedMetrics.PushBack(&pdisk);
}
}
for (auto& [key, vdisk] : LocalVDisks) {
vdisk.ScrubCookieForController = 0; // invalidate all pending requests to BS_CONTROLLER
if (vdisk.VDiskMetrics) {
VDisksWithUnreportedMetrics.PushBack(&vdisk);
}
}

SendRegisterNode();
SendInitialGroupRequests();
SendScrubRequests();
SendDiskMetrics(true);
}

void TNodeWarden::Handle(TEvTabletPipe::TEvClientConnected::TPtr ev) {
Expand All @@ -44,17 +57,6 @@ void TNodeWarden::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr ev) {
}

void TNodeWarden::OnPipeError() {
for (auto& [key, pdisk] : LocalPDisks) {
if (pdisk.PDiskMetrics) {
PDisksWithUnreportedMetrics.PushBack(&pdisk);
}
}
for (auto& [key, vdisk] : LocalVDisks) {
vdisk.ScrubCookieForController = 0; // invalidate all pending requests to BS_CONTROLLER
if (vdisk.VDiskMetrics) {
VDisksWithUnreportedMetrics.PushBack(&vdisk);
}
}
for (const auto& [cookie, callback] : ConfigInFlight) {
callback(nullptr);
}
Expand Down
5 changes: 2 additions & 3 deletions ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ namespace NKikimr::NStorage {
vdisk.ScrubCookieForController = 0; // and from controller too
vdisk.Status = NKikimrBlobStorage::EVDiskStatus::ERROR;
vdisk.ShutdownPending = true;

SendDiskMetrics(false);
VDiskStatusChanged = true;
}

void TNodeWarden::StartLocalVDiskActor(TVDiskRecord& vdisk) {
Expand Down Expand Up @@ -239,6 +238,7 @@ namespace NKikimr::NStorage {
vdisk.Status = NKikimrBlobStorage::EVDiskStatus::INIT_PENDING;
vdisk.ReportedVDiskStatus.reset();
vdisk.ScrubCookie = scrubCookie;
VDiskStatusChanged = true;
}

void TNodeWarden::HandleGone(STATEFN_SIG) {
Expand All @@ -259,7 +259,6 @@ namespace NKikimr::NStorage {
for (const auto& vdisk : serviceSet.GetVDisks()) {
ApplyLocalVDiskInfo(vdisk);
}
SendDiskMetrics(false);
}

void TNodeWarden::ApplyLocalVDiskInfo(const NKikimrBlobStorage::TNodeWardenServiceSet::TVDisk& vdisk) {
Expand Down
17 changes: 2 additions & 15 deletions ydb/core/mind/bscontroller/register_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@ class TBlobStorageController::TTxUpdateNodeDrives
{
NKikimrBlobStorage::TEvControllerUpdateNodeDrives Record;
std::optional<TConfigState> State;
std::unique_ptr<TEvBlobStorage::TEvControllerNodeServiceSetUpdate> Result;

void UpdateDevicesInfo(TConfigState& state, TEvBlobStorage::TEvControllerNodeServiceSetUpdate* result) {
void UpdateDevicesInfo(TConfigState& state) {
auto nodeId = Record.GetNodeId();

auto createLog = [&] () {
Expand Down Expand Up @@ -83,9 +82,6 @@ class TBlobStorageController::TTxUpdateNodeDrives
if (pdiskInfo.LastSeenSerial != serial) {
auto *item = getMutableItem();
item->LastSeenSerial = serial;
if (serial) {
Self->ReadPDisk(pdiskId, *item, result, NKikimrBlobStorage::RESTART);
}
}

return true;
Expand Down Expand Up @@ -164,14 +160,12 @@ class TBlobStorageController::TTxUpdateNodeDrives
bool Execute(TTransactionContext& txc, const TActorContext&) override {
const TNodeId nodeId = Record.GetNodeId();

Result = std::make_unique<TEvBlobStorage::TEvControllerNodeServiceSetUpdate>(NKikimrProto::OK, nodeId);

State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
State->CheckConsistency();

auto updateIsSuccessful = true;
try {
UpdateDevicesInfo(*State, Result.get());
UpdateDevicesInfo(*State);
State->CheckConsistency();
} catch (const TExError& e) {
updateIsSuccessful = false;
Expand All @@ -181,10 +175,6 @@ class TBlobStorageController::TTxUpdateNodeDrives
"Error during UpdateDevicesInfo after receiving TEvControllerRegisterNode", (TExError, e.what()));
}

Result->Record.SetInstanceId(Self->InstanceId);
Result->Record.SetComprehensive(false);
Result->Record.SetAvailDomain(AppData()->DomainsInfo->GetDomain()->DomainUid);

TString error;
if (!updateIsSuccessful || (State->Changed() && !Self->CommitConfigUpdates(*State, false, false, false, txc, &error))) {
State->Rollback();
Expand All @@ -200,9 +190,6 @@ class TBlobStorageController::TTxUpdateNodeDrives
State->ApplyConfigUpdates();
State.reset();
}
if (Result) {
Self->SendToWarden(Record.GetNodeId(), std::move(Result), 0);
}
}
};

Expand Down
Loading