@@ -37,6 +37,11 @@ std::unique_ptr<TEvKqp::TEvAbortExecution> CheckTaskSize(ui64 TxId, const TIntru
37
37
return nullptr ;
38
38
}
39
39
40
+ std::unique_ptr<IEventHandle> MakeActorStartFailureError (const TActorId& executerId, const TString& reason) {
41
+ auto ev = std::make_unique<TEvKqp::TEvAbortExecution>(NYql::NDqProto::StatusIds::OVERLOADED, reason);
42
+ return std::make_unique<IEventHandle>(executerId, executerId, ev.release ());
43
+ }
44
+
40
45
void BuildInitialTaskResources (const TKqpTasksGraph& graph, ui64 taskId, TTaskResourceEstimation& ret) {
41
46
const auto & task = graph.GetTask (taskId);
42
47
const auto & stageInfo = graph.GetStageInfo (task.StageId );
@@ -337,12 +342,12 @@ const IKqpGateway::TKqpSnapshot& TKqpPlanner::GetSnapshot() const {
337
342
338
343
// optimizeProtoForLocalExecution - if we want to execute compute actor locally and don't want to serialize & then deserialize proto message
339
344
// instead we just give ptr to proto message and after that we swap/copy it
340
- void TKqpPlanner::ExecuteDataComputeTask (ui64 taskId, ui32 computeTasksSize) {
345
+ TString TKqpPlanner::ExecuteDataComputeTask (ui64 taskId, ui32 computeTasksSize) {
341
346
auto & task = TasksGraph.GetTask (taskId);
342
347
NYql::NDqProto::TDqTask* taskDesc = ArenaSerializeTaskToProto (TasksGraph, task, true );
343
348
NYql::NDq::TComputeRuntimeSettings settings;
344
349
345
- task. ComputeActorId = CaFactory_->CreateKqpComputeActor ({
350
+ auto startResult = CaFactory_->CreateKqpComputeActor ({
346
351
.ExecuterId = ExecuterId,
347
352
.TxId = TxId,
348
353
.Task = taskDesc,
@@ -360,10 +365,19 @@ void TKqpPlanner::ExecuteDataComputeTask(ui64 taskId, ui32 computeTasksSize) {
360
365
.RlPath = Nothing ()
361
366
});
362
367
368
+ if (const auto * rmResult = std::get_if<NRm::TKqpRMAllocateResult>(&startResult)) {
369
+ return rmResult->GetFailReason ();
370
+ }
371
+
372
+ TActorId* actorId = std::get_if<TActorId>(&startResult);
373
+ Y_ABORT_UNLESS (actorId);
374
+ task.ComputeActorId = *actorId;
375
+
363
376
LOG_D (" Executing task: " << taskId << " on compute actor: " << task.ComputeActorId );
364
377
365
378
auto result = PendingComputeActors.emplace (task.ComputeActorId , TProgressStat ());
366
379
YQL_ENSURE (result.second );
380
+ return TString ();
367
381
}
368
382
369
383
ui32 TKqpPlanner::GetnScanTasks () {
@@ -401,7 +415,10 @@ std::unique_ptr<IEventHandle> TKqpPlanner::PlanExecution() {
401
415
// on datashard tx.
402
416
if (LocalComputeTasks) {
403
417
for (ui64 taskId : ComputeTasks) {
404
- ExecuteDataComputeTask (taskId, ComputeTasks.size ());
418
+ auto result = ExecuteDataComputeTask (taskId, ComputeTasks.size ());
419
+ if (!result.empty ()) {
420
+ return MakeActorStartFailureError (ExecuterId, result);
421
+ }
405
422
}
406
423
ComputeTasks.clear ();
407
424
}
@@ -411,7 +428,10 @@ std::unique_ptr<IEventHandle> TKqpPlanner::PlanExecution() {
411
428
// to execute this task locally so we can avoid useless overhead for remote task launching.
412
429
for (auto & [shardId, tasks]: TasksPerNode) {
413
430
for (ui64 taskId: tasks) {
414
- ExecuteDataComputeTask (taskId, tasks.size ());
431
+ auto result = ExecuteDataComputeTask (taskId, tasks.size ());
432
+ if (!result.empty ()) {
433
+ return MakeActorStartFailureError (ExecuterId, result);
434
+ }
415
435
}
416
436
}
417
437
@@ -437,7 +457,11 @@ std::unique_ptr<IEventHandle> TKqpPlanner::PlanExecution() {
437
457
if (tasksOnNodeIt != TasksPerNode.end ()) {
438
458
auto & tasks = tasksOnNodeIt->second ;
439
459
for (ui64 taskId: tasks) {
440
- ExecuteDataComputeTask (taskId, tasks.size ());
460
+ auto result = ExecuteDataComputeTask (taskId, tasks.size ());
461
+ if (!result.empty ()) {
462
+ return MakeActorStartFailureError (ExecuterId, result);
463
+ }
464
+
441
465
PendingComputeTasks.erase (taskId);
442
466
}
443
467
}
0 commit comments