Skip to content

Commit ae68e9d

Browse files
committed
YQ-3151 added issues for timeout and cancelled (ydb-platform#8718)
1 parent f328b7c commit ae68e9d

File tree

7 files changed

+58
-21
lines changed

7 files changed

+58
-21
lines changed

ydb/core/kqp/executer_actor/kqp_executer_impl.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,10 @@ class TKqpExecuterBase : public TActorBootstrapped<TDerived> {
710710
if (statusCode == Ydb::StatusIds::INTERNAL_ERROR) {
711711
InternalError(issues);
712712
} else if (statusCode == Ydb::StatusIds::TIMEOUT) {
713-
AbortExecutionAndDie(ev->Sender, NYql::NDqProto::StatusIds::TIMEOUT, "Request timeout exceeded");
713+
if (issues.Empty()) {
714+
issues.AddIssue("Request timeout exceeded");
715+
}
716+
AbortExecutionAndDie(ev->Sender, NYql::NDqProto::StatusIds::TIMEOUT, issues);
714717
} else {
715718
RuntimeError(NYql::NDq::DqStatusToYdbStatus(msg.GetStatusCode()), issues);
716719
}
@@ -1679,16 +1682,21 @@ class TKqpExecuterBase : public TActorBootstrapped<TDerived> {
16791682
}
16801683

16811684
void AbortExecutionAndDie(TActorId abortSender, NYql::NDqProto::StatusIds::StatusCode status, const TString& message) {
1685+
AbortExecutionAndDie(abortSender, status, {NYql::TIssue(message)});
1686+
}
1687+
1688+
void AbortExecutionAndDie(TActorId abortSender, NYql::NDqProto::StatusIds::StatusCode status, const NYql::TIssues& issues) {
16821689
if (AlreadyReplied) {
16831690
return;
16841691
}
16851692

1686-
LOG_E("Abort execution: " << NYql::NDqProto::StatusIds_StatusCode_Name(status) << "," << message);
1693+
LOG_E("Abort execution: " << NYql::NDqProto::StatusIds_StatusCode_Name(status) << ", " << issues.ToOneLineString());
16871694
if (ExecuterSpan) {
16881695
ExecuterSpan.EndError(TStringBuilder() << NYql::NDqProto::StatusIds_StatusCode_Name(status));
16891696
}
16901697

16911698
static_cast<TDerived*>(this)->FillResponseStats(Ydb::StatusIds::TIMEOUT);
1699+
NYql::IssuesToMessage(issues, ResponseEv->Record.MutableResponse()->MutableIssues());
16921700

16931701
// TEvAbortExecution can come from either ComputeActor or SessionActor (== Target).
16941702
if (abortSender != Target) {
@@ -1701,7 +1709,7 @@ class TKqpExecuterBase : public TActorBootstrapped<TDerived> {
17011709
this->Send(Target, ResponseEv.release());
17021710

17031711
Request.Transactions.crop(0);
1704-
TerminateComputeActors(Ydb::StatusIds::TIMEOUT, message);
1712+
TerminateComputeActors(Ydb::StatusIds::TIMEOUT, issues);
17051713
this->PassAway();
17061714
}
17071715

ydb/core/kqp/proxy_service/kqp_proxy_service.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,12 +153,14 @@ class TKqpProxyService : public TActorBootstrapped<TKqpProxyService> {
153153
struct TEvOnRequestTimeout: public TEventLocal<TEvOnRequestTimeout, EEv::EvOnRequestTimeout> {
154154
ui64 RequestId;
155155
TDuration Timeout;
156+
TDuration InitialTimeout;
156157
NYql::NDqProto::StatusIds::StatusCode Status;
157158
int Round;
158159

159160
TEvOnRequestTimeout(ui64 requestId, TDuration timeout, NYql::NDqProto::StatusIds::StatusCode status, int round)
160161
: RequestId(requestId)
161162
, Timeout(timeout)
163+
, InitialTimeout(timeout)
162164
, Status(status)
163165
, Round(round)
164166
{}
@@ -1247,9 +1249,9 @@ class TKqpProxyService : public TActorBootstrapped<TKqpProxyService> {
12471249

12481250
const TKqpSessionInfo* info = LocalSessions->FindPtr(reqInfo->SessionId);
12491251
if (msg->Round == 0 && info) {
1250-
TString message = TStringBuilder()
1251-
<< "request's " << (msg->Status == NYql::NDqProto::StatusIds::TIMEOUT ? "timeout" : "cancelAfter")
1252-
<< " exceeded";
1252+
TString message = msg->Status == NYql::NDqProto::StatusIds::TIMEOUT
1253+
? (TStringBuilder() << "Request timeout " << msg->Timeout.MilliSeconds() << "ms exceeded")
1254+
: (TStringBuilder() << "Request canceled after " << msg->Timeout.MilliSeconds() << "ms");
12531255

12541256
Send(info->WorkerId, new TEvKqp::TEvAbortExecution(msg->Status, message));
12551257

@@ -1261,7 +1263,7 @@ class TKqpProxyService : public TActorBootstrapped<TKqpProxyService> {
12611263
}
12621264
} else {
12631265
TString message = TStringBuilder()
1264-
<< "Query did not complete within specified timeout, session id " << reqInfo->SessionId;
1266+
<< "Query did not complete within specified timeout " << msg->InitialTimeout.MilliSeconds() << "ms, session id " << reqInfo->SessionId;
12651267
ReplyProcessError(NYql::NDq::DqStatusToYdbStatus(msg->Status), message, requestId);
12661268
}
12671269
}

ydb/core/kqp/run_script_actor/kqp_run_script_actor.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <ydb/library/ydb_issue/issue_helpers.h>
44
#include <ydb/core/kqp/common/events/events.h>
55
#include <ydb/core/kqp/common/kqp.h>
6+
#include <ydb/core/kqp/common/kqp_timeouts.h>
67
#include <ydb/core/kqp/executer_actor/kqp_executer.h>
78
#include <ydb/core/kqp/proxy_service/kqp_script_executions.h>
89
#include <ydb/core/kqp/proxy_service/proto/result_set_meta.pb.h>
@@ -216,6 +217,12 @@ class TRunScriptActor : public NActors::TActorBootstrapped<TRunScriptActor> {
216217
WaitFinalizationRequest = true;
217218
RunState = IsExecuting() ? ERunState::Finishing : RunState;
218219

220+
if (RunState == ERunState::Cancelling) {
221+
NYql::TIssue cancelIssue("Request was canceled by user");
222+
cancelIssue.SetCode(NYql::DEFAULT_ERROR, NYql::TSeverityIds::S_INFO);
223+
Issues.AddIssue(std::move(cancelIssue));
224+
}
225+
219226
auto scriptFinalizeRequest = std::make_unique<TEvScriptFinalizeRequest>(
220227
GetFinalizationStatusFromRunState(), ExecutionId, Database, Status, GetExecStatusFromStatusCode(Status),
221228
Issues, std::move(QueryStats), std::move(QueryPlan), std::move(QueryAst), LeaseGeneration
@@ -424,6 +431,13 @@ class TRunScriptActor : public NActors::TActorBootstrapped<TRunScriptActor> {
424431
const auto& issueMessage = record.GetResponse().GetQueryIssues();
425432
NYql::IssuesFromMessage(issueMessage, Issues);
426433

434+
if (record.GetYdbStatus() == Ydb::StatusIds::TIMEOUT) {
435+
const TDuration timeout = GetQueryTimeout(NKikimrKqp::QUERY_TYPE_SQL_GENERIC_SCRIPT, Request.GetRequest().GetTimeoutMs(), {}, QueryServiceConfig);
436+
NYql::TIssue timeoutIssue(TStringBuilder() << "Current request timeout is " << timeout.MilliSeconds() << "ms");
437+
timeoutIssue.SetCode(NYql::DEFAULT_ERROR, NYql::TSeverityIds::S_INFO);
438+
Issues.AddIssue(std::move(timeoutIssue));
439+
}
440+
427441
if (record.GetResponse().HasQueryPlan()) {
428442
QueryPlan = record.GetResponse().GetQueryPlan();
429443
}

ydb/core/kqp/session_actor/kqp_query_state.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ std::unique_ptr<TEvTxProxySchemeCache::TEvNavigateKeySet> TKqpQueryState::BuildN
113113

114114

115115
bool TKqpQueryState::SaveAndCheckCompileResult(TEvKqp::TEvCompileResponse* ev) {
116+
CompilationRunning = false;
116117
CompileResult = ev->CompileResult;
117118
YQL_ENSURE(CompileResult);
118119
MaxReadType = CompileResult->MaxReadType;

ydb/core/kqp/session_actor/kqp_query_state.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ class TKqpQueryState : public TNonCopyable {
102102
bool KeepSession = false;
103103
TIntrusiveConstPtr<NACLib::TUserToken> UserToken;
104104
NActors::TMonotonic StartedAt;
105+
bool CompilationRunning = false;
105106

106107
THashMap<NKikimr::TTableId, ui64> TableVersions;
107108

ydb/core/kqp/session_actor/kqp_session_actor.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ class TKqpSessionActor : public TActorBootstrapped<TKqpSessionActor> {
447447

448448
void CompileQuery() {
449449
YQL_ENSURE(QueryState);
450+
QueryState->CompilationRunning = true;
450451
auto ev = QueryState->BuildCompileRequest(CompilationCookie);
451452
LOG_D("Sending CompileQuery request");
452453

@@ -1353,16 +1354,22 @@ class TKqpSessionActor : public TActorBootstrapped<TKqpSessionActor> {
13531354
TString logMsg = TStringBuilder() << "got TEvAbortExecution in " << CurrentStateFuncName();
13541355
LOG_I(logMsg << ", status: " << NYql::NDqProto::StatusIds_StatusCode_Name(msg.GetStatusCode()) << " send to: " << ExecuterId);
13551356

1356-
TString reason = TStringBuilder() << "Request timeout exceeded, cancelling after "
1357-
<< (AppData()->MonotonicTimeProvider->Now() - QueryState->StartedAt).MilliSeconds()
1358-
<< " milliseconds.";
1357+
auto issues = ev->Get()->GetIssues();
1358+
TStringBuilder reason = TStringBuilder() << "Cancelling after " << (AppData()->MonotonicTimeProvider->Now() - QueryState->StartedAt).MilliSeconds() << "ms";
1359+
if (QueryState->CompilationRunning) {
1360+
reason << " during compilation";
1361+
} else if (ExecuterId) {
1362+
reason << " during execution";
1363+
} else {
1364+
reason << " in " << CurrentStateFuncName();
1365+
}
1366+
issues.AddIssue(reason);
13591367

13601368
if (ExecuterId) {
1361-
auto abortEv = MakeHolder<TEvKqp::TEvAbortExecution>(msg.GetStatusCode(), reason);
1369+
auto abortEv = MakeHolder<TEvKqp::TEvAbortExecution>(msg.GetStatusCode(), issues);
13621370
Send(ExecuterId, abortEv.Release(), IEventHandle::FlagTrackDelivery);
13631371
} else {
1364-
const auto& issues = ev->Get()->GetIssues();
1365-
ReplyQueryError(NYql::NDq::DqStatusToYdbStatus(msg.GetStatusCode()), logMsg, MessageFromIssues(issues));
1372+
ReplyQueryError(NYql::NDq::DqStatusToYdbStatus(msg.GetStatusCode()), "", MessageFromIssues(issues));
13661373
}
13671374
}
13681375

@@ -2025,9 +2032,7 @@ class TKqpSessionActor : public TActorBootstrapped<TKqpSessionActor> {
20252032

20262033
void Handle(TEvKqp::TEvCancelQueryRequest::TPtr& ev) {
20272034
{
2028-
auto abort = MakeHolder<NYql::NDq::TEvDq::TEvAbortExecution>();
2029-
abort->Record.SetStatusCode(NYql::NDqProto::StatusIds::CANCELLED);
2030-
abort->Record.AddIssues()->set_message("Canceled");
2035+
auto abort = MakeHolder<NYql::NDq::TEvDq::TEvAbortExecution>(NYql::NDqProto::StatusIds::CANCELLED, "Request was canceled");
20312036
Send(SelfId(), abort.Release());
20322037
}
20332038

ydb/library/yql/dq/actors/compute/dq_compute_actor_impl.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ class TDqComputeActorBase : public NActors::TActorBootstrapped<TDerived>
108108
public:
109109
void Bootstrap() {
110110
try {
111+
StartTime = TInstant::Now();
111112
{
112113
TStringBuilder prefixBuilder;
113114
prefixBuilder << "SelfId: " << this->SelfId() << ", TxId: " << TxId << ", task: " << Task.GetId() << ". ";
@@ -1099,9 +1100,13 @@ class TDqComputeActorBase : public NActors::TActorBootstrapped<TDerived>
10991100
auto tag = (EEvWakeupTag) ev->Get()->Tag;
11001101
switch (tag) {
11011102
case EEvWakeupTag::TimeoutTag: {
1102-
auto abortEv = MakeHolder<TEvDq::TEvAbortExecution>(NYql::NDqProto::StatusIds::TIMEOUT, TStringBuilder()
1103-
<< "Timeout event from compute actor " << this->SelfId()
1104-
<< ", TxId: " << TxId << ", task: " << Task.GetId());
1103+
TStringBuilder reason = TStringBuilder() << "Task execution timeout ";
1104+
if (RuntimeSettings.Timeout) {
1105+
reason << RuntimeSettings.Timeout->MilliSeconds() << "ms ";
1106+
}
1107+
reason << "exceeded, terminating after " << (TInstant::Now() - StartTime).MilliSeconds() << "ms";
1108+
1109+
auto abortEv = MakeHolder<TEvDq::TEvAbortExecution>(NYql::NDqProto::StatusIds::TIMEOUT, reason);
11051110

11061111
if (ComputeActorSpan) {
11071112
ComputeActorSpan.EndError(
@@ -1113,8 +1118,8 @@ class TDqComputeActorBase : public NActors::TActorBootstrapped<TDerived>
11131118

11141119
this->Send(ExecuterId, abortEv.Release());
11151120

1116-
TerminateSources("timeout exceeded", false);
1117-
Terminate(false, "timeout exceeded");
1121+
TerminateSources(reason, false);
1122+
Terminate(false, reason);
11181123
break;
11191124
}
11201125
case EEvWakeupTag::PeriodicStatsTag: {
@@ -2065,6 +2070,7 @@ class TDqComputeActorBase : public NActors::TActorBootstrapped<TDerived>
20652070
NWilson::TSpan ComputeActorSpan;
20662071
TDuration SourceCpuTime;
20672072
private:
2073+
TInstant StartTime;
20682074
bool Running = true;
20692075
TInstant LastSendStatsTime;
20702076
bool PassExceptions = false;

0 commit comments

Comments
 (0)