Skip to content

Commit 5a3ebee

Browse files
va-kuznecovVlad Kuznecov
and
Vlad Kuznecov
authored
Add more introspection into PDisk's ChunkWrite and LogWrite (#7941)
Co-authored-by: Vlad Kuznecov <[email protected]>
1 parent 6567566 commit 5a3ebee

7 files changed

+42
-24
lines changed

ydb/core/blobstorage/lwtrace_probes/blobstorage_probes.h

+14-8
Original file line numberDiff line numberDiff line change
@@ -207,22 +207,22 @@ struct TEventTypeField {
207207
NAMES("pdisk", "schedStep")) \
208208
PROBE(PDiskChunkReadPieceAddToScheduler, GROUPS("PDisk", "PDiskRequest"), \
209209
TYPES(TPDiskIdField, ui32, ui64, ui64), \
210-
NAMES("pdisk", "pieceIdx", "size", "offset")) \
211-
PROBE(PDiskChunkReadPieceSendToDevice, GROUPS("PDisk", "PDiskRequest"), \
212-
TYPES(TPDiskIdField, ui64), \
213-
NAMES("pdisk", "size")) \
210+
NAMES("pdisk", "pieceIdx", "offset", "size")) \
211+
PROBE(PDiskChunkReadPiecesSendToDevice, GROUPS("PDisk", "PDiskRequest"), \
212+
TYPES(TPDiskIdField), \
213+
NAMES("pdisk")) \
214214
PROBE(PDiskChunkReadPieceComplete, GROUPS("PDisk", "PDiskRequest"), \
215-
TYPES(TPDiskIdField, ui64, ui64), \
216-
NAMES("pdisk", "size", "relativeOffset")) \
215+
TYPES(TPDiskIdField, ui64, ui64, double), \
216+
NAMES("pdisk", "size", "relativeOffset", "deviceTimeMs")) \
217217
PROBE(PDiskAddWritePieceToScheduler, GROUPS("PDisk", "PDiskRequest"), \
218218
TYPES(TPDiskIdField, ui64, double, ui64, bool, ui64, ui64), \
219219
NAMES("pdisk", "reqId", "creationTimeSec", "owner", "isFast", "priorityClass", "size")) \
220220
PROBE(PDiskChunkWritePieceSendToDevice, GROUPS("PDisk", "PDiskRequest"), \
221221
TYPES(TPDiskIdField, ui64, ui64, ui64, ui64), \
222222
NAMES("pdisk", "owner", "chunkIdx", "pieceOffset", "pieceSize")) \
223223
PROBE(PDiskLogWriteComplete, GROUPS("PDisk", "PDiskRequest"), \
224-
TYPES(TPDiskIdField, ui64, double, double, double, double, double, double), \
225-
NAMES("pdisk", "reqId", "creationTimeSec", "costMs", "responseTimeMs", "inputTimeMs", "scheduleTimeMs", "deviceTotalTimeMs")) \
224+
TYPES(TPDiskIdField, ui64, double, double, double, double, double, double, double), \
225+
NAMES("pdisk", "reqId", "creationTimeSec", "costMs", "responseTimeMs", "inputTimeMs", "scheduleTimeMs", "deviceTotalTimeMs", "deviceOnlyTimeMs")) \
226226
PROBE(PDiskChunkResponseTime, GROUPS("PDisk", "PDiskRequest"), \
227227
TYPES(TPDiskIdField, ui64, ui64, double, ui64), \
228228
NAMES("pdisk", "reqId", "priorityClass", "responseTimeMs", "sizeBytes")) \
@@ -235,6 +235,12 @@ struct TEventTypeField {
235235
PROBE(PDiskDeviceWriteDuration, GROUPS("PDisk"), \
236236
TYPES(TPDiskIdField, double, ui64), \
237237
NAMES("pdisk", "deviceTimeMs", "size")) \
238+
PROBE(PDiskDeviceGetFromDevice, GROUPS("PDisk"), \
239+
TYPES(), \
240+
NAMES()) \
241+
PROBE(PDiskDeviceGetFromWaiting, GROUPS("PDisk"), \
242+
TYPES(), \
243+
NAMES()) \
238244
PROBE(PDiskDeviceTrimDuration, GROUPS("PDisk"), \
239245
TYPES(TPDiskIdField, double, ui64), \
240246
NAMES("pdisk", "trimTimeMs", "trimOffset")) \

ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp

+18-12
Original file line numberDiff line numberDiff line change
@@ -334,8 +334,8 @@ class TRealBlockDevice : public IBlockDevice {
334334
class TSharedCallback : public ICallback {
335335
ui64 NextPossibleNoop = 0;
336336
ui64 EndOffset = 0;
337-
ui64 PrevEventGotAtCycle = HPNow();
338-
ui64 PrevEstimationAtCycle = HPNow();
337+
NHPTimer::STime PrevEventGotAtCycle = HPNow();
338+
NHPTimer::STime PrevEstimationAtCycle = HPNow();
339339
ui64 PrevEstimatedCostNs = 0;
340340
ui64 PrevActualCostNs = 0;
341341

@@ -366,29 +366,32 @@ class TRealBlockDevice : public IBlockDevice {
366366
void Exec(TAsyncIoOperationResult *event) {
367367
IAsyncIoOperation *op = event->Operation;
368368
// Add up the execution time of all the events
369-
ui64 totalExecutionCycles = 0;
370-
ui64 totalCostNs = 0;
371-
ui64 eventGotAtCycle = HPNow();
369+
NHPTimer::STime eventGotAtCycle = HPNow();
372370
AtomicSet(Device.Mon.LastDoneOperationTimestamp, eventGotAtCycle);
373371

374372
TCompletionAction *completionAction = static_cast<TCompletionAction*>(op->GetCookie());
375373
FillCompletionAction(completionAction, op, event->Result);
374+
completionAction->GetTime = eventGotAtCycle;
375+
LWTRACK(PDiskDeviceGetFromDevice, completionAction->Orbit);
376+
if (completionAction->FlushAction) {
377+
completionAction->FlushAction->GetTime = eventGotAtCycle;
378+
LWTRACK(PDiskDeviceGetFromDevice, completionAction->FlushAction->Orbit);
379+
}
376380

377381
Device.QuitCounter.Decrement();
378382
Device.IdleCounter.Decrement();
379383
Device.FlightControl.MarkComplete(completionAction->OperationIdx);
380384

381-
ui64 startCycle = Max((ui64)completionAction->SubmitTime, PrevEventGotAtCycle);
382-
ui64 durationCycles = (eventGotAtCycle > startCycle) ? eventGotAtCycle - startCycle : 0;
383-
totalExecutionCycles = Max(totalExecutionCycles, durationCycles);
384-
totalCostNs += completionAction->CostNs;
385+
NHPTimer::STime startCycle = Max(completionAction->SubmitTime, (i64)PrevEventGotAtCycle);
386+
NHPTimer::STime durationCycles = (eventGotAtCycle > startCycle) ? eventGotAtCycle - startCycle : 0;
387+
NHPTimer::STime totalExecutionCycles = durationCycles;
388+
NHPTimer::STime totalCostNs = completionAction->CostNs;
385389

386-
bool isSeekExpected =
387-
((ui64)completionAction->SubmitTime + Device.SeekCostNs / 25ull >= PrevEventGotAtCycle);
390+
bool isSeekExpected = (completionAction->SubmitTime + (NHPTimer::STime)Device.SeekCostNs / 25ll >= PrevEventGotAtCycle);
388391

389392
const ui64 opSize = op->GetSize();
390393
Device.DecrementMonInFlight(op->GetType(), opSize);
391-
if (opSize == 0) {
394+
if (opSize == 0) { // Special case for flush operation, which is a read operation with 0 bytes size
392395
if (op->GetType() == IAsyncIoOperation::EType::PRead) {
393396
Y_ABORT_UNLESS(WaitingNoops[completionAction->OperationIdx % MaxWaitingNoops] == nullptr);
394397
WaitingNoops[completionAction->OperationIdx % MaxWaitingNoops] = completionAction;
@@ -432,6 +435,9 @@ class TRealBlockDevice : public IBlockDevice {
432435
while (NextPossibleNoop < firstIncompleteIdx) {
433436
ui64 i = NextPossibleNoop % MaxWaitingNoops;
434437
if (WaitingNoops[i] && WaitingNoops[i]->OperationIdx == NextPossibleNoop) {
438+
LWTRACK(PDiskDeviceGetFromWaiting, WaitingNoops[i]->Orbit);
439+
double durationMs = HPMilliSecondsFloat(HPNow() - WaitingNoops[i]->GetTime);
440+
Device.Mon.DeviceFlushDuration.Increment(durationMs);
435441
Device.CompletionThread->Schedule(WaitingNoops[i]);
436442
WaitingNoops[i] = nullptr;
437443
}

ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion.h

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ namespace NKikimr::NPDisk {
1212
struct TCompletionAction {
1313
ui64 OperationIdx;
1414
NHPTimer::STime SubmitTime;
15+
NHPTimer::STime GetTime;
1516
TCompletionAction *FlushAction = nullptr;
1617
ui64 CostNs = 0;
1718
NWilson::TTraceId TraceId;

ydb/core/blobstorage/pdisk/blobstorage_pdisk_completion_impl.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ void TCompletionLogWrite::Exec(TActorSystem *actorSystem) {
5353
HPMilliSecondsFloat(now - evLog.CreationTime),
5454
HPMilliSecondsFloat(evLog.InputTime - evLog.CreationTime),
5555
HPMilliSecondsFloat(evLog.ScheduleTime - evLog.InputTime),
56-
HPMilliSecondsFloat(now - evLog.ScheduleTime));
56+
HPMilliSecondsFloat(now - evLog.ScheduleTime),
57+
HPMilliSecondsFloat(GetTime - SubmitTime));
5758
if (evLog.Result->Results) {
5859
evLog.Result->Results.front().Orbit = std::move(evLog.Orbit);
5960
}
@@ -274,7 +275,8 @@ void TCompletionChunkReadPart::Exec(TActorSystem *actorSystem) {
274275
endBadUserOffset = 0xffffffff;
275276
}
276277

277-
LWTRACK(PDiskChunkReadPieceComplete, Read->Orbit, PDisk->PDiskId, RawReadSize, CommonBufferOffset);
278+
double deviceTimeMs = HPMilliSecondsFloat(GetTime - SubmitTime);
279+
LWTRACK(PDiskChunkReadPieceComplete, Read->Orbit, PDisk->PDiskId, RawReadSize, CommonBufferOffset, deviceTimeMs);
278280
CumulativeCompletion->PartReadComplete(actorSystem);
279281
CumulativeCompletion = nullptr;
280282

ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -2289,6 +2289,7 @@ void TPDisk::ProcessChunkReadQueue() {
22892289
bool isComplete = false;
22902290
ui8 priorityClass = read->PriorityClass;
22912291
NHPTimer::STime creationTime = read->CreationTime;
2292+
LWTRACK(PDiskChunkReadPiecesSendToDevice, read->Orbit, PDiskId);
22922293
if (!read->IsReplied) {
22932294
LOG_DEBUG_S(*ActorSystem, NKikimrServices::BS_PDISK, "PDiskId# " << (ui32)PDiskId
22942295
<< " ReqId# " << reqId
@@ -3239,7 +3240,7 @@ void TPDisk::PushRequestToForseti(TRequestBase *request) {
32393240
// Schedule small job.
32403241
auto piece = new TChunkReadPiece(read, idx * smallJobSize,
32413242
smallJobSize * Format.SectorSize, false, std::move(span));
3242-
LWTRACK(PDiskChunkReadPieceAddToScheduler, read->Orbit, PDiskId, idx, idx * smallJobSize,
3243+
LWTRACK(PDiskChunkReadPieceAddToScheduler, read->Orbit, PDiskId, idx, idx * smallJobSize * Format.SectorSize,
32433244
smallJobSize * Format.SectorSize);
32443245
piece->EstimateCost(DriveModel);
32453246
piece->SelfPointer = piece;
@@ -3251,7 +3252,7 @@ void TPDisk::PushRequestToForseti(TRequestBase *request) {
32513252
auto piece = new TChunkReadPiece(read, smallJobCount * smallJobSize,
32523253
largeJobSize * Format.SectorSize, true, std::move(span));
32533254
LWTRACK(PDiskChunkReadPieceAddToScheduler, read->Orbit, PDiskId, smallJobCount,
3254-
smallJobCount * smallJobSize, largeJobSize * Format.SectorSize);
3255+
smallJobCount * smallJobSize * Format.SectorSize, largeJobSize * Format.SectorSize);
32553256
piece->EstimateCost(DriveModel);
32563257
piece->SelfPointer = piece;
32573258
AddJobToForseti(cbs, piece, request->JobKind);

ydb/core/blobstorage/pdisk/blobstorage_pdisk_mon.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ TPDiskMon::TPDiskMon(const TIntrusivePtr<::NMonitoring::TDynamicCounters>& count
142142
HISTOGRAM_INIT(DeviceReadDuration, deviceReadDuration);
143143
HISTOGRAM_INIT(DeviceWriteDuration, deviceWriteDuration);
144144
HISTOGRAM_INIT(DeviceTrimDuration, deviceTrimDuration);
145+
HISTOGRAM_INIT(DeviceFlushDuration, deviceFlushDuration);
145146

146147
TRACKER_INIT_IF_EXTENDED(LogQueueTime, logQueueTime, Time in millisec);
147148
TRACKER_INIT_IF_EXTENDED(GetQueueSyncLog, getQueueSyncLog, Time in millisec);

ydb/core/blobstorage/pdisk/blobstorage_pdisk_mon.h

+1
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ struct TPDiskMon {
302302
THistogram DeviceReadDuration;
303303
THistogram DeviceWriteDuration;
304304
THistogram DeviceTrimDuration;
305+
THistogram DeviceFlushDuration;
305306

306307
// <BASE_BITS, EXP_BITS, FRAME_COUNT>
307308
using TDurationTracker = NMonitoring::TPercentileTrackerLg<5, 4, 15>;

0 commit comments

Comments
 (0)