Skip to content

Commit 6c0b232

Browse files
committed
YQ-3446 add queued time into query stats (ydb-platform#6965)
1 parent c2fb3fa commit 6c0b232

File tree

7 files changed

+49
-4
lines changed

7 files changed

+49
-4
lines changed

ydb/core/kqp/opt/kqp_query_plan.cpp

+31-2
Original file line numberDiff line numberDiff line change
@@ -2243,7 +2243,7 @@ TString AddSimplifiedPlan(const TString& planText, TIntrusivePtr<NOpt::TKqpOptim
22432243
return planJson.GetStringRobust();
22442244
}
22452245

2246-
TString SerializeTxPlans(const TVector<const TString>& txPlans, TIntrusivePtr<NOpt::TKqpOptimizeContext> optCtx, const TString commonPlanInfo = "") {
2246+
TString SerializeTxPlans(const TVector<const TString>& txPlans, TIntrusivePtr<NOpt::TKqpOptimizeContext> optCtx, const TString commonPlanInfo = "", const TString& queryStats = "") {
22472247
NJsonWriter::TBuf writer;
22482248
writer.SetIndentSpaces(2);
22492249

@@ -2266,6 +2266,15 @@ TString SerializeTxPlans(const TVector<const TString>& txPlans, TIntrusivePtr<NO
22662266
writer.BeginObject();
22672267
writer.WriteKey("Node Type").WriteString("Query");
22682268
writer.WriteKey("PlanNodeType").WriteString("Query");
2269+
2270+
if (queryStats) {
2271+
NJson::TJsonValue queryStatsJson;
2272+
NJson::ReadJsonTree(queryStats, &queryStatsJson, true);
2273+
2274+
writer.WriteKey("Stats");
2275+
writer.WriteJsonValue(&queryStatsJson);
2276+
}
2277+
22692278
writer.WriteKey("Plans");
22702279
writer.BeginList();
22712280

@@ -2705,7 +2714,27 @@ TString SerializeAnalyzePlan(const NKqpProto::TKqpStatsQuery& queryStats) {
27052714
txPlans.push_back(txPlan);
27062715
}
27072716
}
2708-
return SerializeTxPlans(txPlans, TIntrusivePtr<NOpt::TKqpOptimizeContext>());
2717+
2718+
NJsonWriter::TBuf writer;
2719+
writer.BeginObject();
2720+
2721+
if (queryStats.HasCompilation()) {
2722+
const auto& compilation = queryStats.GetCompilation();
2723+
2724+
writer.WriteKey("Compilation");
2725+
writer.BeginObject();
2726+
writer.WriteKey("FromCache").WriteBool(compilation.GetFromCache());
2727+
writer.WriteKey("DurationUs").WriteLongLong(compilation.GetDurationUs());
2728+
writer.WriteKey("CpuTimeUs").WriteLongLong(compilation.GetCpuTimeUs());
2729+
writer.EndObject();
2730+
}
2731+
2732+
writer.WriteKey("ProcessCpuTimeUs").WriteLongLong(queryStats.GetWorkerCpuTimeUs());
2733+
writer.WriteKey("TotalDurationUs").WriteLongLong(queryStats.GetDurationUs());
2734+
writer.WriteKey("QueuedTimeUs").WriteLongLong(queryStats.GetQueuedTimeUs());
2735+
writer.EndObject();
2736+
2737+
return SerializeTxPlans(txPlans, TIntrusivePtr<NOpt::TKqpOptimizeContext>(), "", writer.Str());
27092738
}
27102739

27112740
TString SerializeScriptPlan(const TVector<const TString>& queryPlans) {

ydb/core/kqp/session_actor/kqp_query_state.h

+1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class TKqpQueryState : public TNonCopyable {
114114
bool IsDocumentApiRestricted_ = false;
115115

116116
TInstant StartTime;
117+
TInstant ContinueTime;
117118
NYql::TKikimrQueryDeadlines QueryDeadlines;
118119
TKqpQueryStats QueryStats;
119120
bool KeepSession = false;

ydb/core/kqp/session_actor/kqp_query_stats.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ ui64 CalcRequestUnit(const TKqpQueryStats& stats) {
210210
NKqpProto::TKqpStatsQuery TKqpQueryStats::ToProto() const {
211211
NKqpProto::TKqpStatsQuery result;
212212
result.SetDurationUs(DurationUs);
213+
result.SetQueuedTimeUs(QueuedTimeUs);
213214

214215
if (Compilation) {
215216
result.MutableCompilation()->SetFromCache(Compilation->FromCache);

ydb/core/kqp/session_actor/kqp_query_stats.h

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ namespace NKikimr::NKqp {
88

99
struct TKqpQueryStats {
1010
ui64 DurationUs = 0;
11+
ui64 QueuedTimeUs = 0;
1112
std::optional<TKqpStatsCompile> Compilation;
1213

1314
ui64 WorkerCpuTimeUs = 0;

ydb/core/kqp/session_actor/kqp_session_actor.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,7 @@ class TKqpSessionActor : public TActorBootstrapped<TKqpSessionActor> {
475475

476476
void Handle(NWorkload::TEvContinueRequest::TPtr& ev) {
477477
YQL_ENSURE(QueryState);
478+
QueryState->ContinueTime = TInstant::Now();
478479

479480
if (ev->Get()->Status == Ydb::StatusIds::UNSUPPORTED) {
480481
LOG_T("Failed to place request in resource pool, feature flag is disabled");
@@ -1552,6 +1553,9 @@ class TKqpSessionActor : public TActorBootstrapped<TKqpSessionActor> {
15521553

15531554
stats->DurationUs = ((TInstant::Now() - QueryState->StartTime).MicroSeconds());
15541555
stats->WorkerCpuTimeUs = (QueryState->GetCpuTime().MicroSeconds());
1556+
if (const auto continueTime = QueryState->ContinueTime) {
1557+
stats->QueuedTimeUs = (continueTime - QueryState->StartTime).MicroSeconds();
1558+
}
15551559
if (QueryState->CompileResult) {
15561560
stats->Compilation.emplace();
15571561
stats->Compilation->FromCache = (QueryState->CompileStats.FromCache);

ydb/core/kqp/workload_service/actors/pool_handlers_acors.cpp

+10-2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ class TPoolHandlerActorBase : public TActor<TDerived> {
5454
UpdateConfigCounters(poolConfig);
5555
}
5656

57+
void CollectRequestLatency(TInstant continueTime) {
58+
if (continueTime) {
59+
RequestsLatencyMs->Collect((TInstant::Now() - continueTime).MilliSeconds());
60+
}
61+
}
62+
5763
void UpdateConfigCounters(const NResourcePool::TPoolSettings& poolConfig) {
5864
InFlightLimit->Set(std::max(poolConfig.ConcurrentQueryLimit, 0));
5965
QueueSizeLimit->Set(std::max(poolConfig.QueueSize, 0));
@@ -106,6 +112,7 @@ class TPoolHandlerActorBase : public TActor<TDerived> {
106112
const TActorId WorkerActorId;
107113
const TString SessionId;
108114
const TInstant StartTime = TInstant::Now();
115+
TInstant ContinueTime;
109116

110117
EState State = EState::Pending;
111118
bool Started = false; // after TEvContinueRequest success
@@ -267,6 +274,7 @@ class TPoolHandlerActorBase : public TActor<TDerived> {
267274
if (status == Ydb::StatusIds::SUCCESS) {
268275
LocalInFlight++;
269276
request->Started = true;
277+
request->ContinueTime = TInstant::Now();
270278
Counters.LocalInFly->Inc();
271279
Counters.ContinueOk->Inc();
272280
Counters.DelayedTimeMs->Collect((TInstant::Now() - request->StartTime).MilliSeconds());
@@ -387,7 +395,7 @@ class TPoolHandlerActorBase : public TActor<TDerived> {
387395

388396
if (status == Ydb::StatusIds::SUCCESS) {
389397
Counters.CleanupOk->Inc();
390-
Counters.RequestsLatencyMs->Collect((TInstant::Now() - request->StartTime).MilliSeconds());
398+
Counters.CollectRequestLatency(request->ContinueTime);
391399
LOG_D("Reply cleanup success to " << request->WorkerActorId << ", session id: " << request->SessionId << ", local in flight: " << LocalInFlight);
392400
} else {
393401
Counters.CleanupError->Inc();
@@ -401,7 +409,7 @@ class TPoolHandlerActorBase : public TActor<TDerived> {
401409
this->Send(MakeKqpProxyID(this->SelfId().NodeId()), ev.release());
402410

403411
Counters.Cancelled->Inc();
404-
Counters.RequestsLatencyMs->Collect((TInstant::Now() - request->StartTime).MilliSeconds());
412+
Counters.CollectRequestLatency(request->ContinueTime);
405413
LOG_I("Cancel request for worker " << request->WorkerActorId << ", session id: " << request->SessionId << ", local in flight: " << LocalInFlight);
406414
}
407415

ydb/core/protos/kqp_stats.proto

+1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ message TKqpExecutionExtraStats {
7373
message TKqpStatsQuery {
7474
// Basic stats
7575
uint64 DurationUs = 1;
76+
uint64 QueuedTimeUs = 9;
7677
TKqpStatsCompile Compilation = 2;
7778

7879
reserved 3; // repeated TKqpStatsExecution Executions = 3;

0 commit comments

Comments
 (0)