@@ -419,68 +419,6 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
419
419
return ret ;
420
420
}
421
421
422
- static struct mlx5_ib_mr * implicit_mr_get_data (struct mlx5_ib_mr * imr ,
423
- u64 io_virt , size_t bcnt )
424
- {
425
- struct ib_umem_odp * odp_imr = to_ib_umem_odp (imr -> umem );
426
- unsigned long end_idx = (io_virt + bcnt - 1 ) >> MLX5_IMR_MTT_SHIFT ;
427
- unsigned long idx = io_virt >> MLX5_IMR_MTT_SHIFT ;
428
- unsigned long inv_start_idx = end_idx + 1 ;
429
- unsigned long inv_len = 0 ;
430
- struct mlx5_ib_mr * result = NULL ;
431
- int ret ;
432
-
433
- lockdep_assert_held (& imr -> dev -> odp_srcu );
434
-
435
- for (idx = idx ; idx <= end_idx ; idx ++ ) {
436
- struct mlx5_ib_mr * mtt = xa_load (& imr -> implicit_children , idx );
437
-
438
- if (unlikely (!mtt )) {
439
- mtt = implicit_get_child_mr (imr , idx );
440
- if (IS_ERR (mtt )) {
441
- result = mtt ;
442
- goto out ;
443
- }
444
- inv_start_idx = min (inv_start_idx , idx );
445
- inv_len = idx - inv_start_idx + 1 ;
446
- }
447
-
448
- /* Return first odp if region not covered by single one */
449
- if (likely (!result ))
450
- result = mtt ;
451
- }
452
-
453
- /*
454
- * Any time the implicit_children are changed we must perform an
455
- * update of the xlt before exiting to ensure the HW and the
456
- * implicit_children remains synchronized.
457
- */
458
- out :
459
- if (likely (!inv_len ))
460
- return result ;
461
-
462
- /*
463
- * Notice this is not strictly ordered right, the KSM is updated after
464
- * the implicit_leaves is updated, so a parallel page fault could see
465
- * a MR that is not yet visible in the KSM. This is similar to a
466
- * parallel page fault seeing a MR that is being concurrently removed
467
- * from the KSM. Both of these improbable situations are resolved
468
- * safely by resuming the HW and then taking another page fault. The
469
- * next pagefault handler will see the new information.
470
- */
471
- mutex_lock (& odp_imr -> umem_mutex );
472
- ret = mlx5_ib_update_xlt (imr , inv_start_idx , inv_len , 0 ,
473
- MLX5_IB_UPD_XLT_INDIRECT |
474
- MLX5_IB_UPD_XLT_ATOMIC );
475
- mutex_unlock (& odp_imr -> umem_mutex );
476
- if (ret ) {
477
- mlx5_ib_err (to_mdev (imr -> ibmr .pd -> device ),
478
- "Failed to update PAS\n" );
479
- return ERR_PTR (ret );
480
- }
481
- return result ;
482
- }
483
-
484
422
struct mlx5_ib_mr * mlx5_ib_alloc_implicit_mr (struct mlx5_ib_pd * pd ,
485
423
struct ib_udata * udata ,
486
424
int access_flags )
@@ -647,6 +585,84 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
647
585
return ret ;
648
586
}
649
587
588
+ static int pagefault_implicit_mr (struct mlx5_ib_mr * imr ,
589
+ struct ib_umem_odp * odp_imr , u64 user_va ,
590
+ size_t bcnt , u32 * bytes_mapped , u32 flags )
591
+ {
592
+ unsigned long end_idx = (user_va + bcnt - 1 ) >> MLX5_IMR_MTT_SHIFT ;
593
+ unsigned long upd_start_idx = end_idx + 1 ;
594
+ unsigned long upd_len = 0 ;
595
+ unsigned long npages = 0 ;
596
+ int err ;
597
+ int ret ;
598
+
599
+ if (unlikely (user_va >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE ||
600
+ mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - user_va < bcnt ))
601
+ return - EFAULT ;
602
+
603
+ /* Fault each child mr that intersects with our interval. */
604
+ while (bcnt ) {
605
+ unsigned long idx = user_va >> MLX5_IMR_MTT_SHIFT ;
606
+ struct ib_umem_odp * umem_odp ;
607
+ struct mlx5_ib_mr * mtt ;
608
+ u64 len ;
609
+
610
+ mtt = xa_load (& imr -> implicit_children , idx );
611
+ if (unlikely (!mtt )) {
612
+ mtt = implicit_get_child_mr (imr , idx );
613
+ if (IS_ERR (mtt )) {
614
+ ret = PTR_ERR (mtt );
615
+ goto out ;
616
+ }
617
+ upd_start_idx = min (upd_start_idx , idx );
618
+ upd_len = idx - upd_start_idx + 1 ;
619
+ }
620
+
621
+ umem_odp = to_ib_umem_odp (mtt -> umem );
622
+ len = min_t (u64 , user_va + bcnt , ib_umem_end (umem_odp )) -
623
+ user_va ;
624
+
625
+ ret = pagefault_real_mr (mtt , umem_odp , user_va , len ,
626
+ bytes_mapped , flags );
627
+ if (ret < 0 )
628
+ goto out ;
629
+ user_va += len ;
630
+ bcnt -= len ;
631
+ npages += ret ;
632
+ }
633
+
634
+ ret = npages ;
635
+
636
+ /*
637
+ * Any time the implicit_children are changed we must perform an
638
+ * update of the xlt before exiting to ensure the HW and the
639
+ * implicit_children remains synchronized.
640
+ */
641
+ out :
642
+ if (likely (!upd_len ))
643
+ return ret ;
644
+
645
+ /*
646
+ * Notice this is not strictly ordered right, the KSM is updated after
647
+ * the implicit_children is updated, so a parallel page fault could
648
+ * see a MR that is not yet visible in the KSM. This is similar to a
649
+ * parallel page fault seeing a MR that is being concurrently removed
650
+ * from the KSM. Both of these improbable situations are resolved
651
+ * safely by resuming the HW and then taking another page fault. The
652
+ * next pagefault handler will see the new information.
653
+ */
654
+ mutex_lock (& odp_imr -> umem_mutex );
655
+ err = mlx5_ib_update_xlt (imr , upd_start_idx , upd_len , 0 ,
656
+ MLX5_IB_UPD_XLT_INDIRECT |
657
+ MLX5_IB_UPD_XLT_ATOMIC );
658
+ mutex_unlock (& odp_imr -> umem_mutex );
659
+ if (err ) {
660
+ mlx5_ib_err (imr -> dev , "Failed to update PAS\n" );
661
+ return err ;
662
+ }
663
+ return ret ;
664
+ }
665
+
650
666
/*
651
667
* Returns:
652
668
* -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are
@@ -660,8 +676,6 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
660
676
u32 * bytes_mapped , u32 flags )
661
677
{
662
678
struct ib_umem_odp * odp = to_ib_umem_odp (mr -> umem );
663
- struct mlx5_ib_mr * mtt ;
664
- int npages = 0 ;
665
679
666
680
if (!odp -> is_implicit_odp ) {
667
681
if (unlikely (io_virt < ib_umem_start (odp ) ||
@@ -670,48 +684,8 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
670
684
return pagefault_real_mr (mr , odp , io_virt , bcnt , bytes_mapped ,
671
685
flags );
672
686
}
673
-
674
- if (unlikely (io_virt >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE ||
675
- mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - io_virt < bcnt ))
676
- return - EFAULT ;
677
-
678
- mtt = implicit_mr_get_data (mr , io_virt , bcnt );
679
- if (IS_ERR (mtt ))
680
- return PTR_ERR (mtt );
681
-
682
- /* Fault each child mr that intersects with our interval. */
683
- while (bcnt ) {
684
- struct ib_umem_odp * umem_odp = to_ib_umem_odp (mtt -> umem );
685
- u64 end = min_t (u64 , io_virt + bcnt , ib_umem_end (umem_odp ));
686
- u64 len = end - io_virt ;
687
- int ret ;
688
-
689
- ret = pagefault_real_mr (mtt , umem_odp , io_virt , len ,
690
- bytes_mapped , flags );
691
- if (ret < 0 )
692
- return ret ;
693
- io_virt += len ;
694
- bcnt -= len ;
695
- npages += ret ;
696
-
697
- if (unlikely (bcnt )) {
698
- mtt = xa_load (& mr -> implicit_children ,
699
- io_virt >> MLX5_IMR_MTT_SHIFT );
700
-
701
- /*
702
- * implicit_mr_get_data sets up all the leaves, this
703
- * means they got invalidated before we got to them.
704
- */
705
- if (!mtt ) {
706
- mlx5_ib_dbg (
707
- mr -> dev ,
708
- "next implicit leaf removed at 0x%llx.\n" ,
709
- io_virt );
710
- return - EAGAIN ;
711
- }
712
- }
713
- }
714
- return npages ;
687
+ return pagefault_implicit_mr (mr , odp , io_virt , bcnt , bytes_mapped ,
688
+ flags );
715
689
}
716
690
717
691
struct pf_frame {
0 commit comments