Skip to content

Commit 351e158

Browse files
haiyangzdavem330
authored andcommitted
hv_netvsc: Add XDP support
This patch adds support of XDP in native mode for hv_netvsc driver, and transparently sets the XDP program on the associated VF NIC as well. Setting / unsetting XDP program on synthetic NIC (netvsc) propagates to VF NIC automatically. Setting / unsetting XDP program on VF NIC directly is not recommended, also not propagated to synthetic NIC, and may be overwritten by setting of synthetic NIC. The Azure/Hyper-V synthetic NIC receive buffer doesn't provide headroom for XDP. We thought about re-use the RNDIS header space, but it's too small. So we decided to copy the packets to a page buffer for XDP. And, most of our VMs on Azure have Accelerated Network (SRIOV) enabled, so most of the packets run on VF NIC. The synthetic NIC is considered as a fallback data-path. So the data copy on netvsc won't impact performance significantly. XDP program cannot run with LRO (RSC) enabled, so you need to disable LRO before running XDP: ethtool -K eth0 lro off XDP actions not yet supported: XDP_REDIRECT Signed-off-by: Haiyang Zhang <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 6ec8b6c commit 351e158

File tree

6 files changed

+409
-39
lines changed

6 files changed

+409
-39
lines changed

Diff for: drivers/net/hyperv/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# SPDX-License-Identifier: GPL-2.0-only
22
obj-$(CONFIG_HYPERV_NET) += hv_netvsc.o
33

4-
hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o netvsc_trace.o
4+
hv_netvsc-y := netvsc_drv.o netvsc.o rndis_filter.o netvsc_trace.o netvsc_bpf.o

Diff for: drivers/net/hyperv/hyperv_net.h

+20-1
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ struct netvsc_device_info {
142142
u32 send_section_size;
143143
u32 recv_section_size;
144144

145+
struct bpf_prog *bprog;
146+
145147
u8 rss_key[NETVSC_HASH_KEYLEN];
146148
};
147149

@@ -189,7 +191,8 @@ int netvsc_send(struct net_device *net,
189191
struct hv_netvsc_packet *packet,
190192
struct rndis_message *rndis_msg,
191193
struct hv_page_buffer *page_buffer,
192-
struct sk_buff *skb);
194+
struct sk_buff *skb,
195+
bool xdp_tx);
193196
void netvsc_linkstatus_callback(struct net_device *net,
194197
struct rndis_message *resp);
195198
int netvsc_recv_callback(struct net_device *net,
@@ -198,6 +201,16 @@ int netvsc_recv_callback(struct net_device *net,
198201
void netvsc_channel_cb(void *context);
199202
int netvsc_poll(struct napi_struct *napi, int budget);
200203

204+
u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
205+
struct xdp_buff *xdp);
206+
unsigned int netvsc_xdp_fraglen(unsigned int len);
207+
struct bpf_prog *netvsc_xdp_get(struct netvsc_device *nvdev);
208+
int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
209+
struct netlink_ext_ack *extack,
210+
struct netvsc_device *nvdev);
211+
int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog);
212+
int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf);
213+
201214
int rndis_set_subchannel(struct net_device *ndev,
202215
struct netvsc_device *nvdev,
203216
struct netvsc_device_info *dev_info);
@@ -832,6 +845,8 @@ struct nvsp_message {
832845
#define RNDIS_MAX_PKT_DEFAULT 8
833846
#define RNDIS_PKT_ALIGN_DEFAULT 8
834847

848+
#define NETVSC_XDP_HDRM 256
849+
835850
struct multi_send_data {
836851
struct sk_buff *skb; /* skb containing the pkt */
837852
struct hv_netvsc_packet *pkt; /* netvsc pkt pending */
@@ -867,6 +882,7 @@ struct netvsc_stats {
867882
u64 bytes;
868883
u64 broadcast;
869884
u64 multicast;
885+
u64 xdp_drop;
870886
struct u64_stats_sync syncp;
871887
};
872888

@@ -972,6 +988,9 @@ struct netvsc_channel {
972988
atomic_t queue_sends;
973989
struct nvsc_rsc rsc;
974990

991+
struct bpf_prog __rcu *bpf_prog;
992+
struct xdp_rxq_info xdp_rxq;
993+
975994
struct netvsc_stats tx_stats;
976995
struct netvsc_stats rx_stats;
977996
};

Diff for: drivers/net/hyperv/netvsc.c

+26-5
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,10 @@ static void free_netvsc_device(struct rcu_head *head)
122122
vfree(nvdev->send_buf);
123123
kfree(nvdev->send_section_map);
124124

125-
for (i = 0; i < VRSS_CHANNEL_MAX; i++)
125+
for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
126+
xdp_rxq_info_unreg(&nvdev->chan_table[i].xdp_rxq);
126127
vfree(nvdev->chan_table[i].mrc.slots);
128+
}
127129

128130
kfree(nvdev);
129131
}
@@ -900,7 +902,8 @@ int netvsc_send(struct net_device *ndev,
900902
struct hv_netvsc_packet *packet,
901903
struct rndis_message *rndis_msg,
902904
struct hv_page_buffer *pb,
903-
struct sk_buff *skb)
905+
struct sk_buff *skb,
906+
bool xdp_tx)
904907
{
905908
struct net_device_context *ndev_ctx = netdev_priv(ndev);
906909
struct netvsc_device *net_device
@@ -923,10 +926,11 @@ int netvsc_send(struct net_device *ndev,
923926
packet->send_buf_index = NETVSC_INVALID_INDEX;
924927
packet->cp_partial = false;
925928

926-
/* Send control message directly without accessing msd (Multi-Send
927-
* Data) field which may be changed during data packet processing.
929+
/* Send a control message or XDP packet directly without accessing
930+
* msd (Multi-Send Data) field which may be changed during data packet
931+
* processing.
928932
*/
929-
if (!skb)
933+
if (!skb || xdp_tx)
930934
return netvsc_send_pkt(device, packet, net_device, pb, skb);
931935

932936
/* batch packets in send buffer if possible */
@@ -1392,6 +1396,21 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
13921396
nvchan->net_device = net_device;
13931397
u64_stats_init(&nvchan->tx_stats.syncp);
13941398
u64_stats_init(&nvchan->rx_stats.syncp);
1399+
1400+
ret = xdp_rxq_info_reg(&nvchan->xdp_rxq, ndev, i);
1401+
1402+
if (ret) {
1403+
netdev_err(ndev, "xdp_rxq_info_reg fail: %d\n", ret);
1404+
goto cleanup2;
1405+
}
1406+
1407+
ret = xdp_rxq_info_reg_mem_model(&nvchan->xdp_rxq,
1408+
MEM_TYPE_PAGE_SHARED, NULL);
1409+
1410+
if (ret) {
1411+
netdev_err(ndev, "xdp reg_mem_model fail: %d\n", ret);
1412+
goto cleanup2;
1413+
}
13951414
}
13961415

13971416
/* Enable NAPI handler before init callbacks */
@@ -1437,6 +1456,8 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
14371456

14381457
cleanup:
14391458
netif_napi_del(&net_device->chan_table[0].napi);
1459+
1460+
cleanup2:
14401461
free_netvsc_device(&net_device->rcu);
14411462

14421463
return ERR_PTR(ret);

Diff for: drivers/net/hyperv/netvsc_bpf.c

+209
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/* Copyright (c) 2019, Microsoft Corporation.
3+
*
4+
* Author:
5+
* Haiyang Zhang <[email protected]>
6+
*/
7+
8+
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9+
10+
#include <linux/netdevice.h>
11+
#include <linux/etherdevice.h>
12+
#include <linux/ethtool.h>
13+
#include <linux/bpf.h>
14+
#include <linux/bpf_trace.h>
15+
#include <linux/kernel.h>
16+
#include <net/xdp.h>
17+
18+
#include <linux/mutex.h>
19+
#include <linux/rtnetlink.h>
20+
21+
#include "hyperv_net.h"
22+
23+
u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
24+
struct xdp_buff *xdp)
25+
{
26+
void *data = nvchan->rsc.data[0];
27+
u32 len = nvchan->rsc.len[0];
28+
struct page *page = NULL;
29+
struct bpf_prog *prog;
30+
u32 act = XDP_PASS;
31+
32+
xdp->data_hard_start = NULL;
33+
34+
rcu_read_lock();
35+
prog = rcu_dereference(nvchan->bpf_prog);
36+
37+
if (!prog)
38+
goto out;
39+
40+
/* allocate page buffer for data */
41+
page = alloc_page(GFP_ATOMIC);
42+
if (!page) {
43+
act = XDP_DROP;
44+
goto out;
45+
}
46+
47+
xdp->data_hard_start = page_address(page);
48+
xdp->data = xdp->data_hard_start + NETVSC_XDP_HDRM;
49+
xdp_set_data_meta_invalid(xdp);
50+
xdp->data_end = xdp->data + len;
51+
xdp->rxq = &nvchan->xdp_rxq;
52+
xdp->handle = 0;
53+
54+
memcpy(xdp->data, data, len);
55+
56+
act = bpf_prog_run_xdp(prog, xdp);
57+
58+
switch (act) {
59+
case XDP_PASS:
60+
case XDP_TX:
61+
case XDP_DROP:
62+
break;
63+
64+
case XDP_ABORTED:
65+
trace_xdp_exception(ndev, prog, act);
66+
break;
67+
68+
default:
69+
bpf_warn_invalid_xdp_action(act);
70+
}
71+
72+
out:
73+
rcu_read_unlock();
74+
75+
if (page && act != XDP_PASS && act != XDP_TX) {
76+
__free_page(page);
77+
xdp->data_hard_start = NULL;
78+
}
79+
80+
return act;
81+
}
82+
83+
unsigned int netvsc_xdp_fraglen(unsigned int len)
84+
{
85+
return SKB_DATA_ALIGN(len) +
86+
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
87+
}
88+
89+
struct bpf_prog *netvsc_xdp_get(struct netvsc_device *nvdev)
90+
{
91+
return rtnl_dereference(nvdev->chan_table[0].bpf_prog);
92+
}
93+
94+
int netvsc_xdp_set(struct net_device *dev, struct bpf_prog *prog,
95+
struct netlink_ext_ack *extack,
96+
struct netvsc_device *nvdev)
97+
{
98+
struct bpf_prog *old_prog;
99+
int buf_max, i;
100+
101+
old_prog = netvsc_xdp_get(nvdev);
102+
103+
if (!old_prog && !prog)
104+
return 0;
105+
106+
buf_max = NETVSC_XDP_HDRM + netvsc_xdp_fraglen(dev->mtu + ETH_HLEN);
107+
if (prog && buf_max > PAGE_SIZE) {
108+
netdev_err(dev, "XDP: mtu:%u too large, buf_max:%u\n",
109+
dev->mtu, buf_max);
110+
NL_SET_ERR_MSG_MOD(extack, "XDP: mtu too large");
111+
112+
return -EOPNOTSUPP;
113+
}
114+
115+
if (prog && (dev->features & NETIF_F_LRO)) {
116+
netdev_err(dev, "XDP: not support LRO\n");
117+
NL_SET_ERR_MSG_MOD(extack, "XDP: not support LRO");
118+
119+
return -EOPNOTSUPP;
120+
}
121+
122+
if (prog)
123+
bpf_prog_add(prog, nvdev->num_chn);
124+
125+
for (i = 0; i < nvdev->num_chn; i++)
126+
rcu_assign_pointer(nvdev->chan_table[i].bpf_prog, prog);
127+
128+
if (old_prog)
129+
for (i = 0; i < nvdev->num_chn; i++)
130+
bpf_prog_put(old_prog);
131+
132+
return 0;
133+
}
134+
135+
int netvsc_vf_setxdp(struct net_device *vf_netdev, struct bpf_prog *prog)
136+
{
137+
struct netdev_bpf xdp;
138+
bpf_op_t ndo_bpf;
139+
140+
ASSERT_RTNL();
141+
142+
if (!vf_netdev)
143+
return 0;
144+
145+
ndo_bpf = vf_netdev->netdev_ops->ndo_bpf;
146+
if (!ndo_bpf)
147+
return 0;
148+
149+
memset(&xdp, 0, sizeof(xdp));
150+
151+
xdp.command = XDP_SETUP_PROG;
152+
xdp.prog = prog;
153+
154+
return ndo_bpf(vf_netdev, &xdp);
155+
}
156+
157+
static u32 netvsc_xdp_query(struct netvsc_device *nvdev)
158+
{
159+
struct bpf_prog *prog = netvsc_xdp_get(nvdev);
160+
161+
if (prog)
162+
return prog->aux->id;
163+
164+
return 0;
165+
}
166+
167+
int netvsc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
168+
{
169+
struct net_device_context *ndevctx = netdev_priv(dev);
170+
struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
171+
struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
172+
struct netlink_ext_ack *extack = bpf->extack;
173+
int ret;
174+
175+
if (!nvdev || nvdev->destroy) {
176+
if (bpf->command == XDP_QUERY_PROG) {
177+
bpf->prog_id = 0;
178+
return 0; /* Query must always succeed */
179+
} else {
180+
return -ENODEV;
181+
}
182+
}
183+
184+
switch (bpf->command) {
185+
case XDP_SETUP_PROG:
186+
ret = netvsc_xdp_set(dev, bpf->prog, extack, nvdev);
187+
188+
if (ret)
189+
return ret;
190+
191+
ret = netvsc_vf_setxdp(vf_netdev, bpf->prog);
192+
193+
if (ret) {
194+
netdev_err(dev, "vf_setxdp failed:%d\n", ret);
195+
NL_SET_ERR_MSG_MOD(extack, "vf_setxdp failed");
196+
197+
netvsc_xdp_set(dev, NULL, extack, nvdev);
198+
}
199+
200+
return ret;
201+
202+
case XDP_QUERY_PROG:
203+
bpf->prog_id = netvsc_xdp_query(nvdev);
204+
return 0;
205+
206+
default:
207+
return -EINVAL;
208+
}
209+
}

0 commit comments

Comments
 (0)