Skip to content

Commit f6b3726

Browse files
authored
Correct PDisk/VDisk metrics reporting (#5776)
1 parent d95d5c6 commit f6b3726

File tree

6 files changed

+39
-34
lines changed

6 files changed

+39
-34
lines changed

ydb/core/blobstorage/nodewarden/node_warden_impl.cpp

+15-4
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,6 @@ void TNodeWarden::Handle(NPDisk::TEvSlayResult::TPtr ev) {
338338
SendVDiskReport(vslotId, msg.VDiskId, NKikimrBlobStorage::TEvControllerNodeReport::WIPED);
339339
TVDiskRecord& vdisk = vdiskIt->second;
340340
StartLocalVDiskActor(vdisk); // restart actor after successful wiping
341-
SendDiskMetrics(false);
342341
}
343342
break;
344343

@@ -540,6 +539,8 @@ void TNodeWarden::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatus::TPtr ev)
540539

541540
auto& record = ev->Get()->Record;
542541

542+
std::unique_ptr<TEvBlobStorage::TEvControllerUpdateDiskStatus> updateDiskStatus;
543+
543544
for (const NKikimrBlobStorage::TVDiskMetrics& m : record.GetVDisksMetrics()) {
544545
Y_ABORT_UNLESS(m.HasVSlotId());
545546
const TVSlotId vslotId(m.GetVSlotId());
@@ -554,8 +555,11 @@ void TNodeWarden::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatus::TPtr ev)
554555
VDisksWithUnreportedMetrics.PushBack(&vdisk);
555556
}
556557
} else {
558+
if (!updateDiskStatus) {
559+
updateDiskStatus.reset(new TEvBlobStorage::TEvControllerUpdateDiskStatus);
560+
}
561+
updateDiskStatus->Record.AddVDisksMetrics()->CopyFrom(m);
557562
vdisk.VDiskMetrics.emplace(m);
558-
VDisksWithUnreportedMetrics.PushBack(&vdisk);
559563
}
560564
}
561565
}
@@ -571,11 +575,18 @@ void TNodeWarden::Handle(TEvBlobStorage::TEvControllerUpdateDiskStatus::TPtr ev)
571575
PDisksWithUnreportedMetrics.PushBack(&pdisk);
572576
}
573577
} else {
578+
if (!updateDiskStatus) {
579+
updateDiskStatus.reset(new TEvBlobStorage::TEvControllerUpdateDiskStatus);
580+
}
581+
updateDiskStatus->Record.AddPDisksMetrics()->CopyFrom(m);
574582
pdisk.PDiskMetrics.emplace(m);
575-
PDisksWithUnreportedMetrics.PushBack(&pdisk);
576583
}
577584
}
578585
}
586+
587+
if (updateDiskStatus) {
588+
SendToController(std::move(updateDiskStatus));
589+
}
579590
}
580591

581592
void TNodeWarden::Handle(TEvBlobStorage::TEvControllerGroupMetricsExchange::TPtr ev) {
@@ -637,7 +648,7 @@ void TNodeWarden::Handle(TEvStatusUpdate::TPtr ev) {
637648
auto& vdisk = it->second;
638649
vdisk.Status = msg->Status;
639650
vdisk.OnlyPhantomsRemain = msg->OnlyPhantomsRemain;
640-
SendDiskMetrics(false);
651+
VDiskStatusChanged = true;
641652

642653
if (msg->Status == NKikimrBlobStorage::EVDiskStatus::READY && vdisk.WhiteboardVDiskId) {
643654
Send(WhiteboardId, new NNodeWhiteboard::TEvWhiteboard::TEvVDiskDropDonors(*vdisk.WhiteboardVDiskId,

ydb/core/blobstorage/nodewarden/node_warden_impl.h

+7
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,8 @@ namespace NKikimr::NStorage {
582582
IActor *CreateGroupResolverActor(ui32 groupId);
583583
void Handle(TEvNodeWardenQueryGroupInfo::TPtr ev);
584584

585+
bool VDiskStatusChanged = false;
586+
585587
STATEFN(StateOnline) {
586588
switch (ev->GetTypeRewrite()) {
587589
fFunc(TEvBlobStorage::TEvPut::EventType, HandleForwarded);
@@ -663,6 +665,11 @@ namespace NKikimr::NStorage {
663665
EnqueuePendingMessage(ev);
664666
break;
665667
}
668+
669+
if (VDiskStatusChanged) {
670+
SendDiskMetrics(false);
671+
VDiskStatusChanged = false;
672+
}
666673
}
667674
};
668675

ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,6 @@ namespace NKikimr::NStorage {
254254
StartLocalVDiskActor(value);
255255
}
256256
}
257-
SendDiskMetrics(false);
258257

259258
vdisks << "}";
260259
STLOG(PRI_NOTICE, BS_NODE, NW74, "RestartLocalPDisk has finished",

ydb/core/blobstorage/nodewarden/node_warden_pipe.cpp

+13-11
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,22 @@ void TNodeWarden::EstablishPipe() {
1818
STLOG(PRI_DEBUG, BS_NODE, NW21, "EstablishPipe", (AvailDomainId, AvailDomainId),
1919
(PipeClientId, PipeClientId), (ControllerId, controllerId));
2020

21+
for (auto& [key, pdisk] : LocalPDisks) {
22+
if (pdisk.PDiskMetrics) {
23+
PDisksWithUnreportedMetrics.PushBack(&pdisk);
24+
}
25+
}
26+
for (auto& [key, vdisk] : LocalVDisks) {
27+
vdisk.ScrubCookieForController = 0; // invalidate all pending requests to BS_CONTROLLER
28+
if (vdisk.VDiskMetrics) {
29+
VDisksWithUnreportedMetrics.PushBack(&vdisk);
30+
}
31+
}
32+
2133
SendRegisterNode();
2234
SendInitialGroupRequests();
2335
SendScrubRequests();
36+
SendDiskMetrics(true);
2437
}
2538

2639
void TNodeWarden::Handle(TEvTabletPipe::TEvClientConnected::TPtr ev) {
@@ -44,17 +57,6 @@ void TNodeWarden::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr ev) {
4457
}
4558

4659
void TNodeWarden::OnPipeError() {
47-
for (auto& [key, pdisk] : LocalPDisks) {
48-
if (pdisk.PDiskMetrics) {
49-
PDisksWithUnreportedMetrics.PushBack(&pdisk);
50-
}
51-
}
52-
for (auto& [key, vdisk] : LocalVDisks) {
53-
vdisk.ScrubCookieForController = 0; // invalidate all pending requests to BS_CONTROLLER
54-
if (vdisk.VDiskMetrics) {
55-
VDisksWithUnreportedMetrics.PushBack(&vdisk);
56-
}
57-
}
5860
for (const auto& [cookie, callback] : ConfigInFlight) {
5961
callback(nullptr);
6062
}

ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp

+2-3
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,7 @@ namespace NKikimr::NStorage {
4949
vdisk.ScrubCookieForController = 0; // and from controller too
5050
vdisk.Status = NKikimrBlobStorage::EVDiskStatus::ERROR;
5151
vdisk.ShutdownPending = true;
52-
53-
SendDiskMetrics(false);
52+
VDiskStatusChanged = true;
5453
}
5554

5655
void TNodeWarden::StartLocalVDiskActor(TVDiskRecord& vdisk) {
@@ -239,6 +238,7 @@ namespace NKikimr::NStorage {
239238
vdisk.Status = NKikimrBlobStorage::EVDiskStatus::INIT_PENDING;
240239
vdisk.ReportedVDiskStatus.reset();
241240
vdisk.ScrubCookie = scrubCookie;
241+
VDiskStatusChanged = true;
242242
}
243243

244244
void TNodeWarden::HandleGone(STATEFN_SIG) {
@@ -259,7 +259,6 @@ namespace NKikimr::NStorage {
259259
for (const auto& vdisk : serviceSet.GetVDisks()) {
260260
ApplyLocalVDiskInfo(vdisk);
261261
}
262-
SendDiskMetrics(false);
263262
}
264263

265264
void TNodeWarden::ApplyLocalVDiskInfo(const NKikimrBlobStorage::TNodeWardenServiceSet::TVDisk& vdisk) {

ydb/core/mind/bscontroller/register_node.cpp

+2-15
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@ class TBlobStorageController::TTxUpdateNodeDrives
1010
{
1111
NKikimrBlobStorage::TEvControllerUpdateNodeDrives Record;
1212
std::optional<TConfigState> State;
13-
std::unique_ptr<TEvBlobStorage::TEvControllerNodeServiceSetUpdate> Result;
1413

15-
void UpdateDevicesInfo(TConfigState& state, TEvBlobStorage::TEvControllerNodeServiceSetUpdate* result) {
14+
void UpdateDevicesInfo(TConfigState& state) {
1615
auto nodeId = Record.GetNodeId();
1716

1817
auto createLog = [&] () {
@@ -83,9 +82,6 @@ class TBlobStorageController::TTxUpdateNodeDrives
8382
if (pdiskInfo.LastSeenSerial != serial) {
8483
auto *item = getMutableItem();
8584
item->LastSeenSerial = serial;
86-
if (serial) {
87-
Self->ReadPDisk(pdiskId, *item, result, NKikimrBlobStorage::RESTART);
88-
}
8985
}
9086

9187
return true;
@@ -164,14 +160,12 @@ class TBlobStorageController::TTxUpdateNodeDrives
164160
bool Execute(TTransactionContext& txc, const TActorContext&) override {
165161
const TNodeId nodeId = Record.GetNodeId();
166162

167-
Result = std::make_unique<TEvBlobStorage::TEvControllerNodeServiceSetUpdate>(NKikimrProto::OK, nodeId);
168-
169163
State.emplace(*Self, Self->HostRecords, TActivationContext::Now());
170164
State->CheckConsistency();
171165

172166
auto updateIsSuccessful = true;
173167
try {
174-
UpdateDevicesInfo(*State, Result.get());
168+
UpdateDevicesInfo(*State);
175169
State->CheckConsistency();
176170
} catch (const TExError& e) {
177171
updateIsSuccessful = false;
@@ -181,10 +175,6 @@ class TBlobStorageController::TTxUpdateNodeDrives
181175
"Error during UpdateDevicesInfo after receiving TEvControllerRegisterNode", (TExError, e.what()));
182176
}
183177

184-
Result->Record.SetInstanceId(Self->InstanceId);
185-
Result->Record.SetComprehensive(false);
186-
Result->Record.SetAvailDomain(AppData()->DomainsInfo->GetDomain()->DomainUid);
187-
188178
TString error;
189179
if (!updateIsSuccessful || (State->Changed() && !Self->CommitConfigUpdates(*State, false, false, false, txc, &error))) {
190180
State->Rollback();
@@ -200,9 +190,6 @@ class TBlobStorageController::TTxUpdateNodeDrives
200190
State->ApplyConfigUpdates();
201191
State.reset();
202192
}
203-
if (Result) {
204-
Self->SendToWarden(Record.GetNodeId(), std::move(Result), 0);
205-
}
206193
}
207194
};
208195

0 commit comments

Comments
 (0)