Skip to content

Commit 1253e8c

Browse files
authored
Add spilling stats per task (#7505)
1 parent 9f656ac commit 1253e8c

21 files changed

+206
-50
lines changed

ydb/core/tx/datashard/datashard_kqp.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,10 @@ class TKqpTaskRunnerExecutionContext: public NDq::IDqTaskRunnerExecutionContext
10201020
return {};
10211021
}
10221022

1023+
TIntrusivePtr<NDq::TSpillingTaskCounters> GetSpillingTaskCounters() const override {
1024+
return {};
1025+
}
1026+
10231027
NDq::TTxId GetTxId() const override {
10241028
return {};
10251029
}

ydb/library/yql/dq/actors/compute/dq_compute_actor_stats.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ void FillTaskRunnerStats(ui64 taskId, ui32 stageId, const TTaskRunnerStatsBase&
6262
protoTask->SetWaitInputTimeUs(taskStats.WaitInputTime.MicroSeconds());
6363
protoTask->SetWaitOutputTimeUs(taskStats.WaitOutputTime.MicroSeconds());
6464

65+
protoTask->SetSpillingComputeWriteBytes(taskStats.SpillingComputeWriteBytes);
66+
protoTask->SetSpillingChannelWriteBytes(taskStats.SpillingChannelWriteBytes);
67+
68+
protoTask->SetSpillingComputeReadTimeUs(taskStats.SpillingComputeReadTime.MicroSeconds());
69+
protoTask->SetSpillingComputeWriteTimeUs(taskStats.SpillingComputeWriteTime.MicroSeconds());
70+
protoTask->SetSpillingChannelReadTimeUs(taskStats.SpillingChannelReadTime.MicroSeconds());
71+
protoTask->SetSpillingChannelWriteTimeUs(taskStats.SpillingChannelWriteTime.MicroSeconds());
72+
6573
if (StatsLevelCollectProfile(level)) {
6674
if (taskStats.ComputeCpuTimeByRun) {
6775
auto snapshot = taskStats.ComputeCpuTimeByRun->Snapshot();

ydb/library/yql/dq/actors/compute/dq_sync_compute_actor_base.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -209,13 +209,13 @@ class TDqSyncComputeActorBase: public TDqComputeActorBase<TDerived, TComputeActo
209209
if (!limits.OutputChunkMaxSize) {
210210
limits.OutputChunkMaxSize = GetDqExecutionSettings().FlowControl.MaxOutputChunkSize;
211211
}
212-
213-
TaskRunner->Prepare(this->Task, limits, execCtx);
214-
212+
215213
if (this->Task.GetEnableSpilling()) {
216214
TaskRunner->SetSpillerFactory(std::make_shared<TDqSpillerFactory>(execCtx.GetTxId(), NActors::TActivationContext::ActorSystem(), execCtx.GetWakeupCallback(), execCtx.GetErrorCallback()));
217215
}
218216

217+
TaskRunner->Prepare(this->Task, limits, execCtx);
218+
219219
for (auto& [channelId, channel] : this->InputChannelsMap) {
220220
channel.Channel = TaskRunner->GetInputChannel(channelId);
221221
}

ydb/library/yql/dq/actors/compute/dq_task_runner_exec_ctx.cpp

+7-2
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,17 @@ TDqTaskRunnerExecutionContext::TDqTaskRunnerExecutionContext(TTxId txId, TWakeUp
1010
: TxId_(txId)
1111
, WakeUpCallback_(std::move(wakeUpCallback))
1212
, ErrorCallback_(std::move(errorCallback))
13+
, SpillingTaskCounters_(MakeIntrusive<TSpillingTaskCounters>())
1314
{
1415
}
1516

1617
IDqChannelStorage::TPtr TDqTaskRunnerExecutionContext::CreateChannelStorage(ui64 channelId, bool withSpilling) const {
1718
return CreateChannelStorage(channelId, withSpilling, NActors::TlsActivationContext->ActorSystem());
1819
}
1920

20-
IDqChannelStorage::TPtr TDqTaskRunnerExecutionContext::CreateChannelStorage(ui64 channelId, bool withSpilling, NActors::TActorSystem* actorSystem) const {
21+
IDqChannelStorage::TPtr TDqTaskRunnerExecutionContext::CreateChannelStorage(ui64 channelId, bool withSpilling, NActors::TActorSystem* actorSystem) const {
2122
if (withSpilling) {
22-
return CreateDqChannelStorage(TxId_, channelId, WakeUpCallback_, ErrorCallback_, actorSystem);
23+
return CreateDqChannelStorage(TxId_, channelId, WakeUpCallback_, ErrorCallback_, SpillingTaskCounters_, actorSystem);
2324
} else {
2425
return nullptr;
2526
}
@@ -33,6 +34,10 @@ TErrorCallback TDqTaskRunnerExecutionContext::GetErrorCallback() const {
3334
return ErrorCallback_;
3435
}
3536

37+
TIntrusivePtr<TSpillingTaskCounters> TDqTaskRunnerExecutionContext::GetSpillingTaskCounters() const {
38+
return SpillingTaskCounters_;
39+
}
40+
3641
TTxId TDqTaskRunnerExecutionContext::GetTxId() const {
3742
return TxId_;
3843
}

ydb/library/yql/dq/actors/compute/dq_task_runner_exec_ctx.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,17 @@ class TDqTaskRunnerExecutionContext : public TDqTaskRunnerExecutionContextBase {
1313

1414
IDqChannelStorage::TPtr CreateChannelStorage(ui64 channelId, bool withSpilling) const override;
1515
IDqChannelStorage::TPtr CreateChannelStorage(ui64 channelId, bool withSpilling, NActors::TActorSystem* actorSystem) const override;
16-
16+
1717
TWakeUpCallback GetWakeupCallback() const override;
1818
TErrorCallback GetErrorCallback() const override;
19+
TIntrusivePtr<TSpillingTaskCounters> GetSpillingTaskCounters() const override;
1920
TTxId GetTxId() const override;
2021

2122
private:
2223
const TTxId TxId_;
2324
const TWakeUpCallback WakeUpCallback_;
2425
const TErrorCallback ErrorCallback_;
26+
const TIntrusivePtr<TSpillingTaskCounters> SpillingTaskCounters_;
2527
};
2628

2729
} // namespace NDq

ydb/library/yql/dq/actors/protos/dq_stats.proto

+8
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,14 @@ message TDqTaskStats {
195195
repeated TDqOutputChannelStats OutputChannels = 153;
196196
repeated TDqAsyncInputBufferStats InputTransforms = 155;
197197

198+
uint64 SpillingComputeWriteBytes = 160;
199+
uint64 SpillingChannelWriteBytes = 161;
200+
201+
uint64 SpillingComputeReadTimeUs = 162;
202+
uint64 SpillingComputeWriteTimeUs = 163;
203+
uint64 SpillingChannelReadTimeUs = 164;
204+
uint64 SpillingChannelWriteTimeUs = 165;
205+
198206
// profile stats
199207
repeated TDqTableStats Tables = 10;
200208

ydb/library/yql/dq/actors/spilling/channel_storage.cpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,11 @@ class TDqChannelStorage : public IDqChannelStorage {
3030
NThreading::TFuture<void> IsBlobWrittenFuture_;
3131
};
3232
public:
33-
TDqChannelStorage(TTxId txId, ui64 channelId, TWakeUpCallback&& wakeUpCallback, TErrorCallback&& errorCallback, TActorSystem* actorSystem)
33+
TDqChannelStorage(TTxId txId, ui64 channelId, TWakeUpCallback&& wakeUpCallback, TErrorCallback&& errorCallback,
34+
TIntrusivePtr<TSpillingTaskCounters> spillingTaskCounters, TActorSystem* actorSystem)
3435
: ActorSystem_(actorSystem)
3536
{
36-
ChannelStorageActor_ = CreateDqChannelStorageActor(txId, channelId, std::move(wakeUpCallback), std::move(errorCallback), actorSystem);
37+
ChannelStorageActor_ = CreateDqChannelStorageActor(txId, channelId, std::move(wakeUpCallback), std::move(errorCallback), spillingTaskCounters, actorSystem);
3738
ChannelStorageActorId_ = ActorSystem_->Register(ChannelStorageActor_->GetActor());
3839
}
3940

@@ -119,12 +120,14 @@ class TDqChannelStorage : public IDqChannelStorage {
119120

120121
} // anonymous namespace
121122

123+
122124
IDqChannelStorage::TPtr CreateDqChannelStorage(TTxId txId, ui64 channelId,
123125
TWakeUpCallback wakeUpCallback,
124126
TErrorCallback errorCallback,
127+
TIntrusivePtr<TSpillingTaskCounters> spillingTaskCounters,
125128
TActorSystem* actorSystem)
126129
{
127-
return new TDqChannelStorage(txId, channelId, std::move(wakeUpCallback), std::move(errorCallback), actorSystem);
130+
return new TDqChannelStorage(txId, channelId, std::move(wakeUpCallback), std::move(errorCallback), spillingTaskCounters, actorSystem);
128131
}
129132

130133
} // namespace NYql::NDq

ydb/library/yql/dq/actors/spilling/channel_storage.h

+3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#pragma once
22

3+
#include "spilling_counters.h"
4+
35
#include <ydb/library/yql/dq/common/dq_common.h>
46
#include <ydb/library/yql/dq/runtime/dq_channel_storage.h>
57
#include <ydb/library/actors/core/actor.h>
@@ -13,6 +15,7 @@ namespace NYql::NDq {
1315
IDqChannelStorage::TPtr CreateDqChannelStorage(TTxId txId, ui64 channelId,
1416
TWakeUpCallback wakeUpCallback,
1517
TErrorCallback errorCallback,
18+
TIntrusivePtr<TSpillingTaskCounters> spillingTaskCounters,
1619
NActors::TActorSystem* actorSystem);
1720

1821
} // namespace NYql::NDq

ydb/library/yql/dq/actors/spilling/channel_storage_actor.cpp

+45-9
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,26 @@ class TDqChannelStorageActor : public IDqChannelStorageActor,
4141
public NActors::TActorBootstrapped<TDqChannelStorageActor>
4242
{
4343
using TBase = TActorBootstrapped<TDqChannelStorageActor>;
44+
45+
struct TWritingBlobInfo {
46+
ui64 Size;
47+
NThreading::TPromise<void> SavePromise;
48+
TInstant OpBegin;
49+
};
50+
51+
struct TLoadingBlobInfo {
52+
NThreading::TPromise<TBuffer> BlobPromise;
53+
TInstant OpBegin;
54+
};
4455
public:
4556

46-
TDqChannelStorageActor(TTxId txId, ui64 channelId, TWakeUpCallback&& wakeUpCallback, TErrorCallback&& errorCallback, TActorSystem* actorSystem)
57+
TDqChannelStorageActor(TTxId txId, ui64 channelId, TWakeUpCallback&& wakeUpCallback, TErrorCallback&& errorCallback,
58+
TIntrusivePtr<TSpillingTaskCounters> spillingTaskCounters, TActorSystem* actorSystem)
4759
: TxId_(txId)
4860
, ChannelId_(channelId)
4961
, WakeUpCallback_(std::move(wakeUpCallback))
5062
, ErrorCallback_(std::move(errorCallback))
63+
, SpillingTaskCounters_(spillingTaskCounters)
5164
, ActorSystem_(actorSystem)
5265
{}
5366

@@ -101,8 +114,11 @@ class TDqChannelStorageActor : public IDqChannelStorageActor,
101114
void HandleWork(TEvDqChannelSpilling::TEvGet::TPtr& ev) {
102115
auto& msg = *ev->Get();
103116
LOG_T("[TEvGet] blobId: " << msg.BlobId_);
117+
118+
auto opBegin = TInstant::Now();
104119

105-
LoadingBlobs_.emplace(msg.BlobId_, std::move(msg.Promise_));
120+
auto loadingBlobInfo = TLoadingBlobInfo{std::move(msg.Promise_), opBegin};
121+
LoadingBlobs_.emplace(msg.BlobId_, std::move(loadingBlobInfo));
106122

107123
SendInternal(SpillingActorId_, new TEvDqSpilling::TEvRead(msg.BlobId_));
108124
}
@@ -111,7 +127,10 @@ class TDqChannelStorageActor : public IDqChannelStorageActor,
111127
auto& msg = *ev->Get();
112128
LOG_T("[TEvPut] blobId: " << msg.BlobId_);
113129

114-
WritingBlobs_.emplace(msg.BlobId_, std::move(msg.Promise_));
130+
auto opBegin = TInstant::Now();
131+
132+
auto writingBlobInfo = TWritingBlobInfo{msg.Blob_.size(), std::move(msg.Promise_), opBegin};
133+
WritingBlobs_.emplace(msg.BlobId_, std::move(writingBlobInfo));
115134

116135
SendInternal(SpillingActorId_, new TEvDqSpilling::TEvWrite(msg.BlobId_, std::move(msg.Blob_)));
117136
}
@@ -126,8 +145,15 @@ class TDqChannelStorageActor : public IDqChannelStorageActor,
126145
return;
127146
}
128147

148+
auto& blobInfo = it->second;
149+
150+
if (SpillingTaskCounters_) {
151+
SpillingTaskCounters_->ChannelWriteBytes += blobInfo.Size;
152+
auto opDuration = TInstant::Now() - blobInfo.OpBegin;
153+
SpillingTaskCounters_->ChannelWriteTime += opDuration.MilliSeconds();
154+
}
129155
// Complete the future
130-
it->second.SetValue();
156+
blobInfo.SavePromise.SetValue();
131157
WritingBlobs_.erase(it);
132158

133159
WakeUpCallback_();
@@ -143,7 +169,14 @@ class TDqChannelStorageActor : public IDqChannelStorageActor,
143169
return;
144170
}
145171

146-
it->second.SetValue(std::move(msg.Blob));
172+
auto& blobInfo = it->second;
173+
174+
if (SpillingTaskCounters_) {
175+
auto opDuration = TInstant::Now() - blobInfo.OpBegin;
176+
SpillingTaskCounters_->ChannelReadTime += opDuration.MilliSeconds();
177+
}
178+
179+
blobInfo.BlobPromise.SetValue(std::move(msg.Blob));
147180
LoadingBlobs_.erase(it);
148181

149182
WakeUpCallback_();
@@ -163,15 +196,17 @@ class TDqChannelStorageActor : public IDqChannelStorageActor,
163196
private:
164197
const TTxId TxId_;
165198
const ui64 ChannelId_;
199+
166200
TWakeUpCallback WakeUpCallback_;
167201
TErrorCallback ErrorCallback_;
202+
TIntrusivePtr<TSpillingTaskCounters> SpillingTaskCounters_;
168203
TActorId SpillingActorId_;
169204

170-
// BlobId -> promise that blob is saved
171-
std::unordered_map<ui64, NThreading::TPromise<void>> WritingBlobs_;
205+
// BlobId -> blob size + promise that blob is saved
206+
std::unordered_map<ui64, TWritingBlobInfo> WritingBlobs_;
172207

173208
// BlobId -> promise with requested blob
174-
std::unordered_map<ui64, NThreading::TPromise<TBuffer>> LoadingBlobs_;
209+
std::unordered_map<ui64, TLoadingBlobInfo> LoadingBlobs_;
175210

176211
TActorSystem* ActorSystem_;
177212
};
@@ -181,9 +216,10 @@ class TDqChannelStorageActor : public IDqChannelStorageActor,
181216
IDqChannelStorageActor* CreateDqChannelStorageActor(TTxId txId, ui64 channelId,
182217
TWakeUpCallback&& wakeUpCallback,
183218
TErrorCallback&& errorCallback,
219+
TIntrusivePtr<TSpillingTaskCounters> spillingTaskCounters,
184220
NActors::TActorSystem* actorSystem)
185221
{
186-
return new TDqChannelStorageActor(txId, channelId, std::move(wakeUpCallback), std::move(errorCallback), actorSystem);
222+
return new TDqChannelStorageActor(txId, channelId, std::move(wakeUpCallback), std::move(errorCallback), spillingTaskCounters, actorSystem);
187223
}
188224

189225
} // namespace NYql::NDq

ydb/library/yql/dq/actors/spilling/channel_storage_actor.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#include "spilling_counters.h"
2+
13
#include <ydb/library/yql/dq/runtime/dq_channel_storage.h>
24
#include "ydb/library/yql/dq/common/dq_common.h"
35

@@ -49,6 +51,8 @@ class IDqChannelStorageActor
4951
virtual NActors::IActor* GetActor() = 0;
5052
};
5153

52-
IDqChannelStorageActor* CreateDqChannelStorageActor(TTxId txId, ui64 channelId, TWakeUpCallback&& wakeUpCallback, TErrorCallback&& errorCallback, NActors::TActorSystem* actorSystem);
54+
55+
IDqChannelStorageActor* CreateDqChannelStorageActor(TTxId txId, ui64 channelId, TWakeUpCallback&& wakeUpCallback, TErrorCallback&& errorCallback,
56+
TIntrusivePtr<TSpillingTaskCounters> spillingTaskCounters, NActors::TActorSystem* actorSystem);
5357

5458
} // namespace NYql::NDq

ydb/library/yql/dq/actors/spilling/compute_storage.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@ namespace NYql::NDq {
66

77
using namespace NActors;
88

9-
TDqComputeStorage::TDqComputeStorage(TTxId txId, TWakeUpCallback wakeUpCallback, TErrorCallback errorCallback, TActorSystem* actorSystem) : ActorSystem_(actorSystem) {
9+
TDqComputeStorage::TDqComputeStorage(TTxId txId, TWakeUpCallback wakeUpCallback, TErrorCallback errorCallback,
10+
TIntrusivePtr<TSpillingTaskCounters> spillingTaskCounters, TActorSystem* actorSystem) : ActorSystem_(actorSystem) {
1011
TStringStream spillerName;
1112
spillerName << "Spiller" << "_" << CreateGuidAsString();
12-
ComputeStorageActor_ = CreateDqComputeStorageActor(txId, spillerName.Str(), wakeUpCallback, errorCallback);
13+
ComputeStorageActor_ = CreateDqComputeStorageActor(txId, spillerName.Str(), wakeUpCallback, errorCallback, spillingTaskCounters);
1314
ComputeStorageActorId_ = ActorSystem_->Register(ComputeStorageActor_->GetActor());
1415
}
1516

ydb/library/yql/dq/actors/spilling/compute_storage.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ namespace NYql::NDq {
1616
class TDqComputeStorage : public NKikimr::NMiniKQL::ISpiller
1717
{
1818
public:
19-
20-
TDqComputeStorage(TTxId txId, TWakeUpCallback wakeUpCallback, TErrorCallback errorCallback, NActors::TActorSystem* actorSystem);
19+
TDqComputeStorage(TTxId txId, TWakeUpCallback wakeUpCallback, TErrorCallback errorCallback,
20+
TIntrusivePtr<TSpillingTaskCounters> spillingTaskCounters, NActors::TActorSystem* actorSystem);
2121

2222
~TDqComputeStorage();
2323

0 commit comments

Comments
 (0)