Skip to content

Commit ce5e83b

Browse files
authored
Merge 68ae248 into f837701
2 parents f837701 + 68ae248 commit ce5e83b

File tree

575 files changed

+2265
-1857
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

575 files changed

+2265
-1857
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ __pycache__/
2626
*.pb.h
2727
*.pb.cc
2828

29+
# Other generated
30+
*.fbs.h
31+
2932
# MacOS specific
3033
.DS_Store
3134

ydb/core/kqp/compile_service/kqp_compile_actor.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -449,9 +449,9 @@ class TKqpCompileActor : public TActorBootstrapped<TKqpCompileActor> {
449449
}
450450

451451
void FillCompileResult(std::unique_ptr<NKikimrKqp::TPreparedQuery> preparingQuery, NKikimrKqp::EQueryType queryType,
452-
bool allowCache) {
452+
bool allowCache, bool success) {
453453
auto preparedQueryHolder = std::make_shared<TPreparedQueryHolder>(
454-
preparingQuery.release(), AppData()->FunctionRegistry);
454+
preparingQuery.release(), AppData()->FunctionRegistry, !success);
455455
preparedQueryHolder->MutableLlvmSettings().Fill(Config, queryType);
456456
KqpCompileResult->PreparedQuery = preparedQueryHolder;
457457
KqpCompileResult->AllowCache = CanCacheQuery(KqpCompileResult->PreparedQuery->GetPhysicalQuery()) && allowCache;
@@ -500,7 +500,7 @@ class TKqpCompileActor : public TActorBootstrapped<TKqpCompileActor> {
500500

501501
if (status == Ydb::StatusIds::SUCCESS) {
502502
YQL_ENSURE(kqpResult.PreparingQuery);
503-
FillCompileResult(std::move(kqpResult.PreparingQuery), queryType, kqpResult.AllowCache);
503+
FillCompileResult(std::move(kqpResult.PreparingQuery), queryType, kqpResult.AllowCache, true);
504504

505505
auto now = TInstant::Now();
506506
auto duration = now - StartTime;
@@ -511,7 +511,7 @@ class TKqpCompileActor : public TActorBootstrapped<TKqpCompileActor> {
511511
<< ", duration: " << duration);
512512
} else {
513513
if (kqpResult.PreparingQuery) {
514-
FillCompileResult(std::move(kqpResult.PreparingQuery), queryType, kqpResult.AllowCache);
514+
FillCompileResult(std::move(kqpResult.PreparingQuery), queryType, kqpResult.AllowCache, false);
515515
}
516516

517517
LOG_ERROR_S(ctx, NKikimrServices::KQP_COMPILE_ACTOR, "Compilation failed"

ydb/core/kqp/executer_actor/kqp_data_executer.cpp

+74-43
Original file line numberDiff line numberDiff line change
@@ -206,50 +206,16 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
206206
);
207207
}
208208

209-
bool LogStatsByLongTasks() const {
210-
return Stats->CollectStatsByLongTasks && HasOlapTable;
211-
}
212-
213-
void FillResponseStats(Ydb::StatusIds::StatusCode status) {
214-
auto& response = *ResponseEv->Record.MutableResponse();
215-
216-
response.SetStatus(status);
217-
218-
if (Stats) {
219-
ReportEventElapsedTime();
220-
221-
Stats->FinishTs = TInstant::Now();
222-
Stats->Finish();
223-
224-
if (LogStatsByLongTasks() || CollectFullStats(Request.StatsMode)) {
225-
for (ui32 txId = 0; txId < Request.Transactions.size(); ++txId) {
226-
const auto& tx = Request.Transactions[txId].Body;
227-
auto planWithStats = AddExecStatsToTxPlan(tx->GetPlan(), response.GetResult().GetStats());
228-
response.MutableResult()->MutableStats()->AddTxPlansWithStats(planWithStats);
229-
}
230-
}
231-
232-
if (LogStatsByLongTasks()) {
233-
const auto& txPlansWithStats = response.GetResult().GetStats().GetTxPlansWithStats();
234-
if (!txPlansWithStats.empty()) {
235-
LOG_N("Full stats: " << txPlansWithStats);
236-
}
237-
}
238-
239-
Stats.reset();
240-
}
241-
}
242-
243209
void Finalize() {
210+
YQL_ENSURE(!AlreadyReplied);
211+
244212
if (LocksBroken) {
245213
return ReplyErrorAndDie(
246214
Ydb::StatusIds::ABORTED,
247215
YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_INVALIDATED, "Transaction locks invalidated. Unknown table."));
248216
}
249217

250-
auto& response = *ResponseEv->Record.MutableResponse();
251-
252-
FillResponseStats(Ydb::StatusIds::SUCCESS);
218+
ResponseEv->Record.MutableResponse()->SetStatus(Ydb::StatusIds::SUCCESS);
253219
Counters->TxProxyMon->ReportStatusOK->Inc();
254220

255221
auto addLocks = [this](const auto& data) {
@@ -289,7 +255,7 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
289255
if (LockHandle) {
290256
ResponseEv->LockHandle = std::move(LockHandle);
291257
}
292-
BuildLocks(*response.MutableResult()->MutableLocks(), Locks);
258+
BuildLocks(*ResponseEv->Record.MutableResponse()->MutableResult()->MutableLocks(), Locks);
293259
}
294260

295261
auto resultSize = ResponseEv->GetByteSize();
@@ -315,9 +281,7 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
315281

316282
ExecuterSpan.EndOk();
317283

318-
Request.Transactions.crop(0);
319-
LOG_D("Sending response to: " << Target << ", results: " << ResponseEv->ResultsSize());
320-
Send(Target, ResponseEv.release());
284+
AlreadyReplied = true;
321285
PassAway();
322286
}
323287

@@ -357,6 +321,8 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
357321
return "WaitSnapshotState";
358322
} else if (func == &TThis::WaitResolveState) {
359323
return "WaitResolveState";
324+
} else if (func == &TThis::WaitShutdownState) {
325+
return "WaitShutdownState";
360326
} else {
361327
return TBase::CurrentStateFuncName();
362328
}
@@ -586,7 +552,7 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
586552
if (ev->Get()->Record.GetState() == NDqProto::COMPUTE_STATE_FAILURE) {
587553
CancelProposal(0);
588554
}
589-
HandleComputeStats(ev);
555+
HandleComputeState(ev);
590556
}
591557

592558
void HandlePrepare(TEvPipeCache::TEvDeliveryProblem::TPtr& ev) {
@@ -1067,7 +1033,7 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
10671033
hFunc(TEvInterconnect::TEvNodeDisconnected, HandleDisconnected);
10681034
hFunc(TEvKqpNode::TEvStartKqpTasksResponse, HandleStartKqpTasksResponse);
10691035
hFunc(TEvTxProxy::TEvProposeTransactionStatus, HandleExecute);
1070-
hFunc(TEvDqCompute::TEvState, HandleComputeStats);
1036+
hFunc(TEvDqCompute::TEvState, HandleComputeState);
10711037
hFunc(NYql::NDq::TEvDqCompute::TEvChannelData, HandleChannelData);
10721038
hFunc(TEvKqpExecuter::TEvStreamDataAck, HandleStreamAck);
10731039
hFunc(TEvKqp::TEvAbortExecution, HandleExecute);
@@ -2685,6 +2651,23 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
26852651
}
26862652
}
26872653

2654+
void Shutdown() override {
2655+
if (Planner) {
2656+
if (Planner->GetPendingComputeTasks().empty() && Planner->GetPendingComputeActors().empty()) {
2657+
LOG_I("Shutdown immediately - nothing to wait");
2658+
PassAway();
2659+
} else {
2660+
this->Become(&TThis::WaitShutdownState);
2661+
LOG_I("Waiting for shutdown of " << Planner->GetPendingComputeTasks().size() << " tasks and "
2662+
<< Planner->GetPendingComputeActors().size() << " compute actors");
2663+
// TODO(ilezhankin): the CA awaiting timeout should be configurable.
2664+
TActivationContext::Schedule(TDuration::Seconds(10), new IEventHandle(SelfId(), SelfId(), new TEvents::TEvPoison));
2665+
}
2666+
} else {
2667+
PassAway();
2668+
}
2669+
}
2670+
26882671
void PassAway() override {
26892672
auto totalTime = TInstant::Now() - StartTime;
26902673
Counters->Counters->DataTxTotalTimeHistogram->Collect(totalTime.MilliSeconds());
@@ -2702,6 +2685,54 @@ class TKqpDataExecuter : public TKqpExecuterBase<TKqpDataExecuter, EExecType::Da
27022685
TBase::PassAway();
27032686
}
27042687

2688+
STATEFN(WaitShutdownState) {
2689+
switch(ev->GetTypeRewrite()) {
2690+
hFunc(TEvDqCompute::TEvState, HandleShutdown);
2691+
hFunc(TEvInterconnect::TEvNodeDisconnected, HandleShutdown);
2692+
hFunc(TEvents::TEvPoison, HandleShutdown);
2693+
default:
2694+
LOG_E("Unexpected event: " << ev->GetTypeName()); // ignore all other events
2695+
}
2696+
}
2697+
2698+
void HandleShutdown(TEvDqCompute::TEvState::TPtr& ev) {
2699+
HandleComputeStats(ev);
2700+
2701+
if (Planner->GetPendingComputeTasks().empty() && Planner->GetPendingComputeActors().empty()) {
2702+
PassAway();
2703+
}
2704+
}
2705+
2706+
void HandleShutdown(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) {
2707+
const auto nodeId = ev->Get()->NodeId;
2708+
LOG_N("Node has disconnected while shutdown: " << nodeId);
2709+
2710+
YQL_ENSURE(Planner);
2711+
2712+
for (const auto& task : TasksGraph.GetTasks()) {
2713+
if (task.Meta.NodeId == nodeId && !task.Meta.Completed) {
2714+
if (task.ComputeActorId) {
2715+
Planner->CompletedCA(task.Id, task.ComputeActorId);
2716+
} else {
2717+
Planner->TaskNotStarted(task.Id);
2718+
}
2719+
}
2720+
}
2721+
2722+
if (Planner->GetPendingComputeTasks().empty() && Planner->GetPendingComputeActors().empty()) {
2723+
PassAway();
2724+
}
2725+
}
2726+
2727+
void HandleShutdown(TEvents::TEvPoison::TPtr& ev) {
2728+
// Self-poison means timeout - don't wait anymore.
2729+
LOG_I("Timed out on waiting for Compute Actors to finish - forcing shutdown");
2730+
2731+
if (ev->Sender == SelfId()) {
2732+
PassAway();
2733+
}
2734+
}
2735+
27052736
private:
27062737
void ReplyTxStateUnknown(ui64 shardId) {
27072738
auto message = TStringBuilder() << "Tx state unknown for shard " << shardId << ", txid " << TxId;

0 commit comments

Comments
 (0)