@@ -667,7 +667,7 @@ class TKqpExecuterBase : public TActorBootstrapped<TDerived> {
667
667
if (statusCode == Ydb::StatusIds::INTERNAL_ERROR) {
668
668
InternalError (issues);
669
669
} else if (statusCode == Ydb::StatusIds::TIMEOUT) {
670
- AbortExecutionAndDie (ev->Sender , NYql::NDqProto::StatusIds::TIMEOUT, " Request timeout exceeded " );
670
+ TimeoutError (ev->Sender );
671
671
} else {
672
672
RuntimeError (NYql::NDq::DqStatusToYdbStatus (msg.GetStatusCode ()), issues);
673
673
}
@@ -1624,14 +1624,14 @@ class TKqpExecuterBase : public TActorBootstrapped<TDerived> {
1624
1624
protected:
1625
1625
void TerminateComputeActors (Ydb::StatusIds::StatusCode code, const NYql::TIssues& issues) {
1626
1626
for (const auto & task : this ->TasksGraph .GetTasks ()) {
1627
- if (task.ComputeActorId ) {
1627
+ if (task.ComputeActorId && !task. Meta . Completed ) {
1628
1628
LOG_I (" aborting compute actor execution, message: " << issues.ToOneLineString ()
1629
1629
<< " , compute actor: " << task.ComputeActorId << " , task: " << task.Id );
1630
1630
1631
1631
auto ev = MakeHolder<TEvKqp::TEvAbortExecution>(NYql::NDq::YdbStatusToDqStatus (code), issues);
1632
1632
this ->Send (task.ComputeActorId , ev.Release ());
1633
1633
} else {
1634
- LOG_I (" task: " << task.Id << " , does not have Compute ActorId yet" );
1634
+ LOG_I (" task: " << task.Id << " , does not have the CA id yet or is already complete " );
1635
1635
}
1636
1636
}
1637
1637
}
@@ -1649,7 +1649,6 @@ class TKqpExecuterBase : public TActorBootstrapped<TDerived> {
1649
1649
1650
1650
void InternalError (const NYql::TIssues& issues) {
1651
1651
LOG_E (issues.ToOneLineString ());
1652
- TerminateComputeActors (Ydb::StatusIds::INTERNAL_ERROR, issues);
1653
1652
auto issue = NYql::YqlIssue ({}, NYql::TIssuesIds::UNEXPECTED, " Internal error while executing transaction." );
1654
1653
for (const NYql::TIssue& i : issues) {
1655
1654
issue.AddSubIssue (MakeIntrusive<NYql::TIssue>(i));
@@ -1663,15 +1662,13 @@ class TKqpExecuterBase : public TActorBootstrapped<TDerived> {
1663
1662
1664
1663
void ReplyUnavailable (const TString& message) {
1665
1664
LOG_E (" UNAVAILABLE: " << message);
1666
- TerminateComputeActors (Ydb::StatusIds::UNAVAILABLE, message);
1667
1665
auto issue = NYql::YqlIssue ({}, NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE);
1668
1666
issue.AddSubIssue (new NYql::TIssue (message));
1669
1667
ReplyErrorAndDie (Ydb::StatusIds::UNAVAILABLE, issue);
1670
1668
}
1671
1669
1672
1670
void RuntimeError (Ydb::StatusIds::StatusCode code, const NYql::TIssues& issues) {
1673
1671
LOG_E (Ydb::StatusIds_StatusCode_Name (code) << " : " << issues.ToOneLineString ());
1674
- TerminateComputeActors (code, issues);
1675
1672
ReplyErrorAndDie (code, issues);
1676
1673
}
1677
1674
@@ -1687,11 +1684,19 @@ class TKqpExecuterBase : public TActorBootstrapped<TDerived> {
1687
1684
ReplyErrorAndDie (status, &issues);
1688
1685
}
1689
1686
1690
- void AbortExecutionAndDie (TActorId abortSender, NYql::NDqProto::StatusIds::StatusCode status, const TString& message ) {
1687
+ void TimeoutError (TActorId abortSender) {
1691
1688
if (AlreadyReplied) {
1689
+ LOG_E (" Timeout when we already replied - not good" << Endl << TBackTrace ().PrintToString () << Endl);
1692
1690
return ;
1693
1691
}
1694
1692
1693
+ const auto status = NYql::NDqProto::StatusIds::TIMEOUT;
1694
+ const TString message = " Request timeout exceeded" ;
1695
+
1696
+ TerminateComputeActors (Ydb::StatusIds::TIMEOUT, message);
1697
+
1698
+ AlreadyReplied = true ;
1699
+
1695
1700
LOG_E (" Abort execution: " << NYql::NDqProto::StatusIds_StatusCode_Name (status) << " ," << message);
1696
1701
if (ExecuterSpan) {
1697
1702
ExecuterSpan.EndError (TStringBuilder () << NYql::NDqProto::StatusIds_StatusCode_Name (status));
@@ -1701,17 +1706,14 @@ class TKqpExecuterBase : public TActorBootstrapped<TDerived> {
1701
1706
1702
1707
// TEvAbortExecution can come from either ComputeActor or SessionActor (== Target).
1703
1708
if (abortSender != Target) {
1704
- auto abortEv = MakeHolder<TEvKqp::TEvAbortExecution>(status, " Request timeout exceeded " );
1709
+ auto abortEv = MakeHolder<TEvKqp::TEvAbortExecution>(status, message );
1705
1710
this ->Send (Target, abortEv.Release ());
1706
1711
}
1707
1712
1708
- AlreadyReplied = true ;
1709
1713
LOG_E (" Sending timeout response to: " << Target);
1710
- this ->Send (Target, ResponseEv.release ());
1711
1714
1712
1715
Request.Transactions .crop (0 );
1713
- TerminateComputeActors (Ydb::StatusIds::TIMEOUT, message);
1714
- this ->PassAway ();
1716
+ this ->Shutdown ();
1715
1717
}
1716
1718
1717
1719
void FillResponseStats (Ydb::StatusIds::StatusCode status) {
@@ -1746,17 +1748,11 @@ class TKqpExecuterBase : public TActorBootstrapped<TDerived> {
1746
1748
google::protobuf::RepeatedPtrField<Ydb::Issue::IssueMessage>* issues)
1747
1749
{
1748
1750
if (AlreadyReplied) {
1751
+ LOG_E (" Error when we already replied - not good" << Endl << TBackTrace ().PrintToString () << Endl);
1749
1752
return ;
1750
1753
}
1751
1754
1752
- if (Planner) {
1753
- for (auto computeActor : Planner->GetPendingComputeActors ()) {
1754
- LOG_D (" terminate compute actor " << computeActor.first );
1755
-
1756
- auto ev = MakeHolder<TEvKqp::TEvAbortExecution>(NYql::NDq::YdbStatusToDqStatus (status), " Terminate execution" );
1757
- this ->Send (computeActor.first , ev.Release ());
1758
- }
1759
- }
1755
+ TerminateComputeActors (status, " Terminate execution" );
1760
1756
1761
1757
AlreadyReplied = true ;
1762
1758
auto & response = *ResponseEv->Record .MutableResponse ();
@@ -1782,8 +1778,7 @@ class TKqpExecuterBase : public TActorBootstrapped<TDerived> {
1782
1778
ExecuterStateSpan.EndError (response.DebugString ());
1783
1779
1784
1780
Request.Transactions .crop (0 );
1785
- this ->Send (Target, ResponseEv.release ());
1786
- this ->PassAway ();
1781
+ this ->Shutdown ();
1787
1782
}
1788
1783
1789
1784
protected:
@@ -1851,7 +1846,16 @@ class TKqpExecuterBase : public TActorBootstrapped<TDerived> {
1851
1846
}
1852
1847
1853
1848
protected:
1849
+ // Introduced separate method from `PassAway()` - to not get confused with expectations from other actors,
1850
+ // that `PassAway()` should kill actor immediately.
1851
+ virtual void Shutdown () {
1852
+ PassAway ();
1853
+ }
1854
+
1854
1855
void PassAway () override {
1856
+ YQL_ENSURE (AlreadyReplied && ResponseEv);
1857
+ this ->Send (Target, ResponseEv.release ());
1858
+
1855
1859
for (auto channelPair: ResultChannelProxies) {
1856
1860
LOG_D (" terminate result channel " << channelPair.first << " proxy at " << channelPair.second ->SelfId ());
1857
1861
@@ -1872,12 +1876,11 @@ class TKqpExecuterBase : public TActorBootstrapped<TDerived> {
1872
1876
1873
1877
if (KqpTableResolverId) {
1874
1878
this ->Send (KqpTableResolverId, new TEvents::TEvPoison);
1875
- this ->Send (this ->SelfId (), new TEvents::TEvPoison);
1876
- LOG_T (" Terminate, become ZombieState" );
1877
- this ->Become (&TKqpExecuterBase::ZombieState);
1878
- } else {
1879
- IActor::PassAway ();
1880
1879
}
1880
+
1881
+ this ->Send (this ->SelfId (), new TEvents::TEvPoison);
1882
+ LOG_T (" Terminate, become ZombieState" );
1883
+ this ->Become (&TKqpExecuterBase::ZombieState);
1881
1884
}
1882
1885
1883
1886
STATEFN (ZombieState) {
0 commit comments