Skip to content

Commit f600b69

Browse files
jrfastabdavem330
authored andcommitted
virtio_net: Add XDP support
This adds XDP support to virtio_net. Some requirements must be met for XDP to be enabled depending on the mode. First it will only be supported with LRO disabled so that data is not pushed across multiple buffers. Second the MTU must be less than a page size to avoid having to handle XDP across multiple pages. If mergeable receive is enabled this patch only supports the case where header and data are in the same buf which we can check when a packet is received by looking at num_buf. If the num_buf is greater than 1 and a XDP program is loaded the packet is dropped and a warning is thrown. When any_header_sg is set this does not happen and both header and data is put in a single buffer as expected so we check this when XDP programs are loaded. Subsequent patches will process the packet in a degraded mode to ensure connectivity and correctness is not lost even if backend pushes packets into multiple buffers. If big packets mode is enabled and MTU/LRO conditions above are met then XDP is allowed. This patch was tested with qemu with vhost=on and vhost=off where mergeable and big_packet modes were forced via hard coding feature negotiation. Multiple buffers per packet was forced via a small test patch to vhost.c in the vhost=on qemu mode. Suggested-by: Shrijeet Mukherjee <[email protected]> Signed-off-by: John Fastabend <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent f23bc46 commit f600b69

File tree

1 file changed

+171
-5
lines changed

1 file changed

+171
-5
lines changed

Diff for: drivers/net/virtio_net.c

+171-5
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <linux/module.h>
2323
#include <linux/virtio.h>
2424
#include <linux/virtio_net.h>
25+
#include <linux/bpf.h>
2526
#include <linux/scatterlist.h>
2627
#include <linux/if_vlan.h>
2728
#include <linux/slab.h>
@@ -81,6 +82,8 @@ struct receive_queue {
8182

8283
struct napi_struct napi;
8384

85+
struct bpf_prog __rcu *xdp_prog;
86+
8487
/* Chain pages by the private ptr. */
8588
struct page *pages;
8689

@@ -324,6 +327,38 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
324327
return skb;
325328
}
326329

330+
static u32 do_xdp_prog(struct virtnet_info *vi,
331+
struct bpf_prog *xdp_prog,
332+
struct page *page, int offset, int len)
333+
{
334+
int hdr_padded_len;
335+
struct xdp_buff xdp;
336+
u32 act;
337+
u8 *buf;
338+
339+
buf = page_address(page) + offset;
340+
341+
if (vi->mergeable_rx_bufs)
342+
hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
343+
else
344+
hdr_padded_len = sizeof(struct padded_vnet_hdr);
345+
346+
xdp.data = buf + hdr_padded_len;
347+
xdp.data_end = xdp.data + (len - vi->hdr_len);
348+
349+
act = bpf_prog_run_xdp(xdp_prog, &xdp);
350+
switch (act) {
351+
case XDP_PASS:
352+
return XDP_PASS;
353+
default:
354+
bpf_warn_invalid_xdp_action(act);
355+
case XDP_TX:
356+
case XDP_ABORTED:
357+
case XDP_DROP:
358+
return XDP_DROP;
359+
}
360+
}
361+
327362
static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len)
328363
{
329364
struct sk_buff * skb = buf;
@@ -340,14 +375,32 @@ static struct sk_buff *receive_big(struct net_device *dev,
340375
void *buf,
341376
unsigned int len)
342377
{
378+
struct bpf_prog *xdp_prog;
343379
struct page *page = buf;
344-
struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
380+
struct sk_buff *skb;
345381

382+
rcu_read_lock();
383+
xdp_prog = rcu_dereference(rq->xdp_prog);
384+
if (xdp_prog) {
385+
struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
386+
u32 act;
387+
388+
if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
389+
goto err_xdp;
390+
act = do_xdp_prog(vi, xdp_prog, page, 0, len);
391+
if (act == XDP_DROP)
392+
goto err_xdp;
393+
}
394+
rcu_read_unlock();
395+
396+
skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
346397
if (unlikely(!skb))
347398
goto err;
348399

349400
return skb;
350401

402+
err_xdp:
403+
rcu_read_unlock();
351404
err:
352405
dev->stats.rx_dropped++;
353406
give_pages(rq, page);
@@ -365,11 +418,42 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
365418
u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
366419
struct page *page = virt_to_head_page(buf);
367420
int offset = buf - page_address(page);
368-
unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
421+
struct sk_buff *head_skb, *curr_skb;
422+
struct bpf_prog *xdp_prog;
423+
unsigned int truesize;
424+
425+
rcu_read_lock();
426+
xdp_prog = rcu_dereference(rq->xdp_prog);
427+
if (xdp_prog) {
428+
u32 act;
429+
430+
/* No known backend devices should send packets with
431+
* more than a single buffer when XDP conditions are
432+
* met. However it is not strictly illegal so the case
433+
* is handled as an exception and a warning is thrown.
434+
*/
435+
if (unlikely(num_buf > 1)) {
436+
bpf_warn_invalid_xdp_buffer();
437+
goto err_xdp;
438+
}
439+
440+
/* Transient failure which in theory could occur if
441+
* in-flight packets from before XDP was enabled reach
442+
* the receive path after XDP is loaded. In practice I
443+
* was not able to create this condition.
444+
*/
445+
if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
446+
goto err_xdp;
447+
448+
act = do_xdp_prog(vi, xdp_prog, page, offset, len);
449+
if (act == XDP_DROP)
450+
goto err_xdp;
451+
}
452+
rcu_read_unlock();
369453

370-
struct sk_buff *head_skb = page_to_skb(vi, rq, page, offset, len,
371-
truesize);
372-
struct sk_buff *curr_skb = head_skb;
454+
truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
455+
head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
456+
curr_skb = head_skb;
373457

374458
if (unlikely(!curr_skb))
375459
goto err_skb;
@@ -423,6 +507,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
423507
ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
424508
return head_skb;
425509

510+
err_xdp:
511+
rcu_read_unlock();
426512
err_skb:
427513
put_page(page);
428514
while (--num_buf) {
@@ -1337,6 +1423,13 @@ static int virtnet_set_channels(struct net_device *dev,
13371423
if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
13381424
return -EINVAL;
13391425

1426+
/* For now we don't support modifying channels while XDP is loaded
1427+
* also when XDP is loaded all RX queues have XDP programs so we only
1428+
* need to check a single RX queue.
1429+
*/
1430+
if (vi->rq[0].xdp_prog)
1431+
return -EINVAL;
1432+
13401433
get_online_cpus();
13411434
err = virtnet_set_queues(vi, queue_pairs);
13421435
if (!err) {
@@ -1428,6 +1521,70 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
14281521
.set_settings = virtnet_set_settings,
14291522
};
14301523

1524+
static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
1525+
{
1526+
unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
1527+
struct virtnet_info *vi = netdev_priv(dev);
1528+
struct bpf_prog *old_prog;
1529+
int i;
1530+
1531+
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
1532+
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6)) {
1533+
netdev_warn(dev, "can't set XDP while host is implementing LRO, disable LRO first\n");
1534+
return -EOPNOTSUPP;
1535+
}
1536+
1537+
if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
1538+
netdev_warn(dev, "XDP expects header/data in single page, any_header_sg required\n");
1539+
return -EINVAL;
1540+
}
1541+
1542+
if (dev->mtu > max_sz) {
1543+
netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
1544+
return -EINVAL;
1545+
}
1546+
1547+
if (prog) {
1548+
prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
1549+
if (IS_ERR(prog))
1550+
return PTR_ERR(prog);
1551+
}
1552+
1553+
for (i = 0; i < vi->max_queue_pairs; i++) {
1554+
old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
1555+
rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
1556+
if (old_prog)
1557+
bpf_prog_put(old_prog);
1558+
}
1559+
1560+
return 0;
1561+
}
1562+
1563+
static bool virtnet_xdp_query(struct net_device *dev)
1564+
{
1565+
struct virtnet_info *vi = netdev_priv(dev);
1566+
int i;
1567+
1568+
for (i = 0; i < vi->max_queue_pairs; i++) {
1569+
if (vi->rq[i].xdp_prog)
1570+
return true;
1571+
}
1572+
return false;
1573+
}
1574+
1575+
static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp)
1576+
{
1577+
switch (xdp->command) {
1578+
case XDP_SETUP_PROG:
1579+
return virtnet_xdp_set(dev, xdp->prog);
1580+
case XDP_QUERY_PROG:
1581+
xdp->prog_attached = virtnet_xdp_query(dev);
1582+
return 0;
1583+
default:
1584+
return -EINVAL;
1585+
}
1586+
}
1587+
14311588
static const struct net_device_ops virtnet_netdev = {
14321589
.ndo_open = virtnet_open,
14331590
.ndo_stop = virtnet_close,
@@ -1444,6 +1601,7 @@ static const struct net_device_ops virtnet_netdev = {
14441601
#ifdef CONFIG_NET_RX_BUSY_POLL
14451602
.ndo_busy_poll = virtnet_busy_poll,
14461603
#endif
1604+
.ndo_xdp = virtnet_xdp,
14471605
};
14481606

14491607
static void virtnet_config_changed_work(struct work_struct *work)
@@ -1505,12 +1663,20 @@ static void virtnet_free_queues(struct virtnet_info *vi)
15051663

15061664
static void free_receive_bufs(struct virtnet_info *vi)
15071665
{
1666+
struct bpf_prog *old_prog;
15081667
int i;
15091668

1669+
rtnl_lock();
15101670
for (i = 0; i < vi->max_queue_pairs; i++) {
15111671
while (vi->rq[i].pages)
15121672
__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
1673+
1674+
old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
1675+
RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
1676+
if (old_prog)
1677+
bpf_prog_put(old_prog);
15131678
}
1679+
rtnl_unlock();
15141680
}
15151681

15161682
static void free_receive_page_frags(struct virtnet_info *vi)

0 commit comments

Comments
 (0)