Skip to content

Commit ab680dc

Browse files
kwachowsjlawryno
authored andcommitted
accel/ivpu: Fix locking order in ivpu_job_submit
Fix deadlock in job submission and abort handling. When a thread aborts currently executing jobs due to a fault, it first locks the global lock protecting submitted_jobs (#1). After the last job is destroyed, it proceeds to release the related context and locks file_priv (#2). Meanwhile, in the job submission thread, the file_priv lock (#2) is taken first, and then the submitted_jobs lock (#1) is obtained when a job is added to the submitted jobs list. CPU0 CPU1 ---- ---- (for example due to a fault) (jobs submissions keep coming) lock(&vdev->submitted_jobs_lock) #1 ivpu_jobs_abort_all() job_destroy() lock(&file_priv->lock) #2 lock(&vdev->submitted_jobs_lock) #1 file_priv_release() lock(&vdev->context_list_lock) lock(&file_priv->lock) #2 This order of locking causes a deadlock. To resolve this issue, change the order of locking in ivpu_job_submit(). Signed-off-by: Karol Wachowski <[email protected]> Signed-off-by: Maciej Falkowski <[email protected]> Reviewed-by: Jacek Lawrynowicz <[email protected]> Signed-off-by: Jacek Lawrynowicz <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent e524436 commit ab680dc

File tree

1 file changed

+7
-10
lines changed

1 file changed

+7
-10
lines changed

drivers/accel/ivpu/ivpu_job.c

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,7 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
597597
if (ret < 0)
598598
return ret;
599599

600+
mutex_lock(&vdev->submitted_jobs_lock);
600601
mutex_lock(&file_priv->lock);
601602

602603
if (cmdq_id == 0)
@@ -606,27 +607,25 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
606607
if (!cmdq) {
607608
ivpu_warn_ratelimited(vdev, "Failed to get job queue, ctx %d\n", file_priv->ctx.id);
608609
ret = -EINVAL;
609-
goto err_unlock_file_priv;
610+
goto err_unlock;
610611
}
611612

612613
ret = ivpu_cmdq_register(file_priv, cmdq);
613614
if (ret) {
614615
ivpu_err(vdev, "Failed to register command queue: %d\n", ret);
615-
goto err_unlock_file_priv;
616+
goto err_unlock;
616617
}
617618

618619
job->cmdq_id = cmdq->id;
619620

620-
mutex_lock(&vdev->submitted_jobs_lock);
621-
622621
is_first_job = xa_empty(&vdev->submitted_jobs_xa);
623622
ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit,
624623
&file_priv->job_id_next, GFP_KERNEL);
625624
if (ret < 0) {
626625
ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
627626
file_priv->ctx.id);
628627
ret = -EBUSY;
629-
goto err_unlock_submitted_jobs;
628+
goto err_unlock;
630629
}
631630

632631
ret = ivpu_cmdq_push_job(cmdq, job);
@@ -649,22 +648,20 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
649648
job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->priority,
650649
job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
651650

652-
mutex_unlock(&vdev->submitted_jobs_lock);
653651
mutex_unlock(&file_priv->lock);
654652

655653
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) {
656-
mutex_lock(&vdev->submitted_jobs_lock);
657654
ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
658-
mutex_unlock(&vdev->submitted_jobs_lock);
659655
}
660656

657+
mutex_unlock(&vdev->submitted_jobs_lock);
658+
661659
return 0;
662660

663661
err_erase_xa:
664662
xa_erase(&vdev->submitted_jobs_xa, job->job_id);
665-
err_unlock_submitted_jobs:
663+
err_unlock:
666664
mutex_unlock(&vdev->submitted_jobs_lock);
667-
err_unlock_file_priv:
668665
mutex_unlock(&file_priv->lock);
669666
ivpu_rpm_put(vdev);
670667
return ret;

0 commit comments

Comments
 (0)