Skip to content

Commit 6f57428

Browse files
dtatuleaSaeed Mahameed
authored and
Saeed Mahameed
committed
net/mlx5e: RX, Enable skb page recycling through the page_pool
Start using the page_pool skb recycling api to recycle all pages back to the page pool and stop using atomic page reference counting. The mlx5e driver used to manage in-flight pages using page refcounting: for each fragment there were 2 atomic write operations happening (one for building the skb and one on skb release). The page_pool api introduced a method to track page fragments more optimally: * The page's pp_fragment_count is set to a large bias on page alloc (1 x atomic write operation). * The driver tracks the actual page fragments in a non atomic variable. * When the skb is recycled, pp_fragment_count is decremented (atomic write operation). * When page is released in the driver, the unused number of fragments (relative to the bias) is deducted from pp_fragment_count (atomic write operation). * Last page defragmentation will only be an atomic read. So in total there are `number of fragments + 1` atomic write ops. As opposed to previously: `2 * frags` atomic writes ops. Pages are wrapped in a mlx5e_frag_page structure which also contains the number of fragments. This makes it easy to count the fragments in the driver. This change brings performance improvements for the case when the old rx page_cache had low recycling rates due to head of queue blocking. For a iperf3 TCP test with a single stream, on a single core (iperf and receive queue running on same core), the following improvements can be noticed: * Striding rq: - before (net-next baseline): bitrate = 30.1 Gbits/sec - after : bitrate = 31.4 Gbits/sec (diff: 4.14 %) * Legacy rq: - before (net-next baseline): bitrate = 30.2 Gbits/sec - after : bitrate = 33.0 Gbits/sec (diff: 8.48 %) There are 2 temporary performance degradations introduced: 1) TCP streams that had a good recycling rate with the old page_cache have a degradation for both striding and linear rq. This is due to very low page pool cache recycling: the pages are released during skb recycle which will release pages to the page pool ring for safety. The following patches in this series will tackle this problem by deferring the page release in the driver to increase the chance of having pages recycled to the cache. 2) XDP performance is now lower (4-5 %) due to the higher number of atomic operations used for fragment management. But this opens the door for supporting multiple packets per page in XDP, which will bring a big gain. Otherwise, performance is similar to baseline. Signed-off-by: Dragos Tatulea <[email protected]> Reviewed-by: Tariq Toukan <[email protected]> Signed-off-by: Saeed Mahameed <[email protected]>
1 parent 4a5c5e2 commit 6f57428

File tree

5 files changed

+121
-88
lines changed

5 files changed

+121
-88
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -600,16 +600,22 @@ struct mlx5e_icosq {
600600
struct work_struct recover_work;
601601
} ____cacheline_aligned_in_smp;
602602

603+
struct mlx5e_frag_page {
604+
struct page *page;
605+
u16 frags;
606+
};
607+
603608
struct mlx5e_wqe_frag_info {
604609
union {
605-
struct page **pagep;
610+
struct mlx5e_frag_page *frag_page;
606611
struct xdp_buff **xskp;
607612
};
608613
u32 offset;
609614
bool last_in_page;
610615
};
611616

612617
union mlx5e_alloc_units {
618+
DECLARE_FLEX_ARRAY(struct mlx5e_frag_page, frag_pages);
613619
DECLARE_FLEX_ARRAY(struct page *, pages);
614620
DECLARE_FLEX_ARRAY(struct xdp_buff *, xsk_buffs);
615621
};
@@ -666,15 +672,15 @@ struct mlx5e_rq_frags_info {
666672
struct mlx5e_dma_info {
667673
dma_addr_t addr;
668674
union {
669-
struct page **pagep;
675+
struct mlx5e_frag_page *frag_page;
670676
struct page *page;
671677
};
672678
};
673679

674680
struct mlx5e_shampo_hd {
675681
u32 mkey;
676682
struct mlx5e_dma_info *info;
677-
struct page **pages;
683+
struct mlx5e_frag_page *pages;
678684
u16 curr_page_index;
679685
u16 hd_per_wq;
680686
u16 hd_per_wqe;

drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget);
6565
int mlx5e_poll_ico_cq(struct mlx5e_cq *cq);
6666

6767
/* RX */
68-
void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle);
6968
INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq));
7069
INDIRECT_CALLABLE_DECLARE(bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq));
7170
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
@@ -488,7 +487,7 @@ static inline bool mlx5e_icosq_can_post_wqe(struct mlx5e_icosq *sq, u16 wqe_size
488487

489488
static inline struct mlx5e_mpw_info *mlx5e_get_mpw_info(struct mlx5e_rq *rq, int i)
490489
{
491-
size_t isz = struct_size(rq->mpwqe.info, alloc_units.pages, rq->mpwqe.pages_per_wqe);
490+
size_t isz = struct_size(rq->mpwqe.info, alloc_units.frag_pages, rq->mpwqe.pages_per_wqe);
492491

493492
return (struct mlx5e_mpw_info *)((char *)rq->mpwqe.info + array_size(i, isz));
494493
}

drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,8 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
523523
break;
524524
case MLX5E_XDP_XMIT_MODE_PAGE:
525525
/* XDP_TX from the regular RQ */
526-
mlx5e_page_release_dynamic(xdpi.page.rq, xdpi.page.page, recycle);
526+
page_pool_put_defragged_page(xdpi.page.rq->page_pool,
527+
xdpi.page.page, -1, recycle);
527528
break;
528529
case MLX5E_XDP_XMIT_MODE_XSK:
529530
/* AF_XDP send */

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
294294
size_t alloc_size;
295295

296296
alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info,
297-
alloc_units.pages,
297+
alloc_units.frag_pages,
298298
rq->mpwqe.pages_per_wqe));
299299

300300
rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node);
@@ -509,7 +509,8 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
509509

510510
WARN_ON(rq->xsk_pool);
511511

512-
next_frag.pagep = &rq->wqe.alloc_units->pages[0];
512+
next_frag.frag_page = &rq->wqe.alloc_units->frag_pages[0];
513+
513514
for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
514515
struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
515516
struct mlx5e_wqe_frag_info *frag =
@@ -519,7 +520,7 @@ static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
519520
for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) {
520521
if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) {
521522
/* Pages are assigned at runtime. */
522-
next_frag.pagep++;
523+
next_frag.frag_page++;
523524
next_frag.offset = 0;
524525
if (prev)
525526
prev->last_in_page = true;
@@ -563,7 +564,7 @@ static int mlx5e_init_wqe_alloc_info(struct mlx5e_rq *rq, int node)
563564
if (rq->xsk_pool)
564565
aus_sz = sizeof(*aus->xsk_buffs);
565566
else
566-
aus_sz = sizeof(*aus->pages);
567+
aus_sz = sizeof(*aus->frag_pages);
567568

568569
aus = kvzalloc_node(array_size(len, aus_sz), GFP_KERNEL, node);
569570
if (!aus)
@@ -831,7 +832,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
831832
struct page_pool_params pp_params = { 0 };
832833

833834
pp_params.order = 0;
834-
pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
835+
pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | PP_FLAG_PAGE_FRAG;
835836
pp_params.pool_size = pool_size;
836837
pp_params.nid = node;
837838
pp_params.dev = rq->pdev;

0 commit comments

Comments
 (0)