Skip to content

Commit 3389baa

Browse files
committed
RDMA/mlx5: Reduce locking in implicit_mr_get_data()
Now that the child MRs are stored in an xarray we can rely on the SRCU lock to protect the xa_load and use xa_cmpxchg on the slow allocation path to resolve races with concurrent page fault. This reduces the scope of the critical section of umem_mutex for implicit MRs to only cover mlx5_ib_update_xlt, and avoids taking a lock at all if the child MR is already in the xarray. This makes it consistent with the normal ODP MR critical section for umem_lock, and the locking approach used for destroying an unusued implicit child MR. The MLX5_IB_UPD_XLT_ATOMIC is no longer needed in implicit_get_child_mr() since it is no longer called with any locks. Link: https://lore.kernel.org/r/[email protected] Reviewed-by: Artemy Kovalyov <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 423f52d commit 3389baa

File tree

1 file changed

+26
-12
lines changed
  • drivers/infiniband/hw/mlx5

1 file changed

+26
-12
lines changed

drivers/infiniband/hw/mlx5/odp.c

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
381381
MLX5_IMR_MTT_ENTRIES,
382382
PAGE_SHIFT,
383383
MLX5_IB_UPD_XLT_ZAP |
384-
MLX5_IB_UPD_XLT_ENABLE |
385-
MLX5_IB_UPD_XLT_ATOMIC);
384+
MLX5_IB_UPD_XLT_ENABLE);
386385
if (err) {
387386
ret = ERR_PTR(err);
388387
goto out_release;
@@ -392,9 +391,16 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
392391
* Once the store to either xarray completes any error unwind has to
393392
* use synchronize_srcu(). Avoid this with xa_reserve()
394393
*/
395-
err = xa_err(xa_store(&imr->implicit_children, idx, mr, GFP_KERNEL));
396-
if (err) {
397-
ret = ERR_PTR(err);
394+
ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, GFP_KERNEL);
395+
if (unlikely(ret)) {
396+
if (xa_is_err(ret)) {
397+
ret = ERR_PTR(xa_err(ret));
398+
goto out_release;
399+
}
400+
/*
401+
* Another thread beat us to creating the child mr, use
402+
* theirs.
403+
*/
398404
goto out_release;
399405
}
400406

@@ -424,7 +430,8 @@ static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr,
424430
struct mlx5_ib_mr *result = NULL;
425431
int ret;
426432

427-
mutex_lock(&odp_imr->umem_mutex);
433+
lockdep_assert_held(&imr->dev->odp_srcu);
434+
428435
for (idx = idx; idx <= end_idx; idx++) {
429436
struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx);
430437

@@ -450,20 +457,27 @@ static struct mlx5_ib_mr *implicit_mr_get_data(struct mlx5_ib_mr *imr,
450457
*/
451458
out:
452459
if (likely(!inv_len))
453-
goto out_unlock;
460+
return result;
454461

462+
/*
463+
* Notice this is not strictly ordered right, the KSM is updated after
464+
* the implicit_leaves is updated, so a parallel page fault could see
465+
* a MR that is not yet visible in the KSM. This is similar to a
466+
* parallel page fault seeing a MR that is being concurrently removed
467+
* from the KSM. Both of these improbable situations are resolved
468+
* safely by resuming the HW and then taking another page fault. The
469+
* next pagefault handler will see the new information.
470+
*/
471+
mutex_lock(&odp_imr->umem_mutex);
455472
ret = mlx5_ib_update_xlt(imr, inv_start_idx, inv_len, 0,
456473
MLX5_IB_UPD_XLT_INDIRECT |
457474
MLX5_IB_UPD_XLT_ATOMIC);
475+
mutex_unlock(&odp_imr->umem_mutex);
458476
if (ret) {
459477
mlx5_ib_err(to_mdev(imr->ibmr.pd->device),
460478
"Failed to update PAS\n");
461-
result = ERR_PTR(ret);
462-
goto out_unlock;
479+
return ERR_PTR(ret);
463480
}
464-
465-
out_unlock:
466-
mutex_unlock(&odp_imr->umem_mutex);
467481
return result;
468482
}
469483

0 commit comments

Comments
 (0)