Skip to content

Commit 9431709

Browse files
petarpenkovdavem330
authored andcommitted
tun: enable NAPI for TUN/TAP driver
Changes TUN driver to use napi_gro_receive() upon receiving packets rather than netif_rx_ni(). Adds flag IFF_NAPI that enables these changes and operation is not affected if the flag is disabled. SKBs are constructed upon packet arrival and are queued to be processed later. The new path was evaluated with a benchmark with the following setup: Open two tap devices and a receiver thread that reads in a loop for each device. Start one sender thread and pin all threads to different CPUs. Send 1M minimum UDP packets to each device and measure sending time for each of the sending methods: napi_gro_receive(): 4.90s netif_rx_ni(): 4.90s netif_receive_skb(): 7.20s Signed-off-by: Petar Penkov <[email protected]> Cc: Eric Dumazet <[email protected]> Cc: Mahesh Bandewar <[email protected]> Cc: Willem de Bruijn <[email protected]> Cc: [email protected] Cc: [email protected] Acked-by: Mahesh Bandewar <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent e94cd81 commit 9431709

File tree

2 files changed

+119
-15
lines changed

2 files changed

+119
-15
lines changed

drivers/net/tun.c

+118-15
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ do { \
121121
#define TUN_VNET_BE 0x40000000
122122

123123
#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
124-
IFF_MULTI_QUEUE)
124+
IFF_MULTI_QUEUE | IFF_NAPI)
125125
#define GOODCOPY_LEN 128
126126

127127
#define FLT_EXACT_COUNT 8
@@ -172,6 +172,7 @@ struct tun_file {
172172
u16 queue_index;
173173
unsigned int ifindex;
174174
};
175+
struct napi_struct napi;
175176
struct list_head next;
176177
struct tun_struct *detached;
177178
struct skb_array tx_array;
@@ -229,6 +230,68 @@ struct tun_struct {
229230
struct bpf_prog __rcu *xdp_prog;
230231
};
231232

233+
static int tun_napi_receive(struct napi_struct *napi, int budget)
234+
{
235+
struct tun_file *tfile = container_of(napi, struct tun_file, napi);
236+
struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
237+
struct sk_buff_head process_queue;
238+
struct sk_buff *skb;
239+
int received = 0;
240+
241+
__skb_queue_head_init(&process_queue);
242+
243+
spin_lock(&queue->lock);
244+
skb_queue_splice_tail_init(queue, &process_queue);
245+
spin_unlock(&queue->lock);
246+
247+
while (received < budget && (skb = __skb_dequeue(&process_queue))) {
248+
napi_gro_receive(napi, skb);
249+
++received;
250+
}
251+
252+
if (!skb_queue_empty(&process_queue)) {
253+
spin_lock(&queue->lock);
254+
skb_queue_splice(&process_queue, queue);
255+
spin_unlock(&queue->lock);
256+
}
257+
258+
return received;
259+
}
260+
261+
static int tun_napi_poll(struct napi_struct *napi, int budget)
262+
{
263+
unsigned int received;
264+
265+
received = tun_napi_receive(napi, budget);
266+
267+
if (received < budget)
268+
napi_complete_done(napi, received);
269+
270+
return received;
271+
}
272+
273+
static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
274+
bool napi_en)
275+
{
276+
if (napi_en) {
277+
netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
278+
NAPI_POLL_WEIGHT);
279+
napi_enable(&tfile->napi);
280+
}
281+
}
282+
283+
static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile)
284+
{
285+
if (tun->flags & IFF_NAPI)
286+
napi_disable(&tfile->napi);
287+
}
288+
289+
static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile)
290+
{
291+
if (tun->flags & IFF_NAPI)
292+
netif_napi_del(&tfile->napi);
293+
}
294+
232295
#ifdef CONFIG_TUN_VNET_CROSS_LE
233296
static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
234297
{
@@ -541,6 +604,11 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
541604

542605
tun = rtnl_dereference(tfile->tun);
543606

607+
if (tun && clean) {
608+
tun_napi_disable(tun, tfile);
609+
tun_napi_del(tun, tfile);
610+
}
611+
544612
if (tun && !tfile->detached) {
545613
u16 index = tfile->queue_index;
546614
BUG_ON(index >= tun->numqueues);
@@ -598,6 +666,7 @@ static void tun_detach_all(struct net_device *dev)
598666
for (i = 0; i < n; i++) {
599667
tfile = rtnl_dereference(tun->tfiles[i]);
600668
BUG_ON(!tfile);
669+
tun_napi_disable(tun, tfile);
601670
tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
602671
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
603672
RCU_INIT_POINTER(tfile->tun, NULL);
@@ -613,6 +682,7 @@ static void tun_detach_all(struct net_device *dev)
613682
synchronize_net();
614683
for (i = 0; i < n; i++) {
615684
tfile = rtnl_dereference(tun->tfiles[i]);
685+
tun_napi_del(tun, tfile);
616686
/* Drop read queue */
617687
tun_queue_purge(tfile);
618688
sock_put(&tfile->sk);
@@ -631,7 +701,8 @@ static void tun_detach_all(struct net_device *dev)
631701
module_put(THIS_MODULE);
632702
}
633703

634-
static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter)
704+
static int tun_attach(struct tun_struct *tun, struct file *file,
705+
bool skip_filter, bool napi)
635706
{
636707
struct tun_file *tfile = file->private_data;
637708
struct net_device *dev = tun->dev;
@@ -677,10 +748,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
677748
rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
678749
tun->numqueues++;
679750

680-
if (tfile->detached)
751+
if (tfile->detached) {
681752
tun_enable_queue(tfile);
682-
else
753+
} else {
683754
sock_hold(&tfile->sk);
755+
tun_napi_init(tun, tfile, napi);
756+
}
684757

685758
tun_set_real_num_queues(tun);
686759

@@ -956,13 +1029,28 @@ static void tun_poll_controller(struct net_device *dev)
9561029
* Tun only receives frames when:
9571030
* 1) the char device endpoint gets data from user space
9581031
* 2) the tun socket gets a sendmsg call from user space
959-
* Since both of those are synchronous operations, we are guaranteed
960-
* never to have pending data when we poll for it
961-
* so there is nothing to do here but return.
1032+
* If NAPI is not enabled, since both of those are synchronous
1033+
* operations, we are guaranteed never to have pending data when we poll
1034+
* for it so there is nothing to do here but return.
9621035
* We need this though so netpoll recognizes us as an interface that
9631036
* supports polling, which enables bridge devices in virt setups to
9641037
* still use netconsole
1038+
* If NAPI is enabled, however, we need to schedule polling for all
1039+
* queues.
9651040
*/
1041+
struct tun_struct *tun = netdev_priv(dev);
1042+
1043+
if (tun->flags & IFF_NAPI) {
1044+
struct tun_file *tfile;
1045+
int i;
1046+
1047+
rcu_read_lock();
1048+
for (i = 0; i < tun->numqueues; i++) {
1049+
tfile = rcu_dereference(tun->tfiles[i]);
1050+
napi_schedule(&tfile->napi);
1051+
}
1052+
rcu_read_unlock();
1053+
}
9661054
return;
9671055
}
9681056
#endif
@@ -1549,11 +1637,25 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
15491637
}
15501638

15511639
rxhash = __skb_get_hash_symmetric(skb);
1552-
#ifndef CONFIG_4KSTACKS
1553-
tun_rx_batched(tun, tfile, skb, more);
1554-
#else
1555-
netif_rx_ni(skb);
1556-
#endif
1640+
1641+
if (tun->flags & IFF_NAPI) {
1642+
struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
1643+
int queue_len;
1644+
1645+
spin_lock_bh(&queue->lock);
1646+
__skb_queue_tail(queue, skb);
1647+
queue_len = skb_queue_len(queue);
1648+
spin_unlock(&queue->lock);
1649+
1650+
if (!more || queue_len > NAPI_POLL_WEIGHT)
1651+
napi_schedule(&tfile->napi);
1652+
1653+
local_bh_enable();
1654+
} else if (!IS_ENABLED(CONFIG_4KSTACKS)) {
1655+
tun_rx_batched(tun, tfile, skb, more);
1656+
} else {
1657+
netif_rx_ni(skb);
1658+
}
15571659

15581660
stats = get_cpu_ptr(tun->pcpu_stats);
15591661
u64_stats_update_begin(&stats->syncp);
@@ -1980,7 +2082,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
19802082
if (err < 0)
19812083
return err;
19822084

1983-
err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER);
2085+
err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER,
2086+
ifr->ifr_flags & IFF_NAPI);
19842087
if (err < 0)
19852088
return err;
19862089

@@ -2066,7 +2169,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
20662169
NETIF_F_HW_VLAN_STAG_TX);
20672170

20682171
INIT_LIST_HEAD(&tun->disabled);
2069-
err = tun_attach(tun, file, false);
2172+
err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI);
20702173
if (err < 0)
20712174
goto err_free_flow;
20722175

@@ -2216,7 +2319,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
22162319
ret = security_tun_dev_attach_queue(tun->security);
22172320
if (ret < 0)
22182321
goto unlock;
2219-
ret = tun_attach(tun, file, false);
2322+
ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI);
22202323
} else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
22212324
tun = rtnl_dereference(tfile->tun);
22222325
if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached)

include/uapi/linux/if_tun.h

+1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
/* TUNSETIFF ifr flags */
6161
#define IFF_TUN 0x0001
6262
#define IFF_TAP 0x0002
63+
#define IFF_NAPI 0x0010
6364
#define IFF_NO_PI 0x1000
6465
/* This flag has no real effect */
6566
#define IFF_ONE_QUEUE 0x2000

0 commit comments

Comments
 (0)