Skip to content

Commit ea9916e

Browse files
author
Alexei Starovoitov
committed
Merge branch 'ndo_xdp_xmit-cleanup'
Jesper Dangaard Brouer says: ==================== As I mentioned in merge commit 10f6786 ("Merge branch 'xdp_xmit-bulking'") I plan to change the API for ndo_xdp_xmit once more, by adding a flags argument, which is done in this patchset. I know it is late in the cycle (currently at rc7), but it would be nice to avoid changing NDOs over several kernel releases, as it is annoying to vendors and distro backporters, but it is not strictly UAPI so it is allowed (according to Alexei). The end-goal is getting rid of the ndo_xdp_flush operation, as it will make it possible for drivers to implement a TXQ synchronization mechanism that is not necessarily derived from the CPU id (smp_processor_id). This patchset removes all callers of the ndo_xdp_flush operation, but it doesn't take the last step of removing it from all drivers. This can be done later, or I can update the patchset on request. Micro-benchmarks only show a very small performance improvement, for map-redirect around ~2 ns, and for non-map redirect ~7 ns. I've not benchmarked this with CONFIG_RETPOLINE, but the performance benefit should be more visible given we end-up removing an indirect call. --- V2: Updated based on feedback from Song Liu <[email protected]> ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 69b4507 + c1ece6b commit ea9916e

File tree

9 files changed

+72
-35
lines changed

9 files changed

+72
-35
lines changed

drivers/net/ethernet/intel/i40e/i40e_txrx.c

+12-2
Original file line numberDiff line numberDiff line change
@@ -3670,11 +3670,13 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
36703670
* For error cases, a negative errno code is returned and no-frames
36713671
* are transmitted (caller must handle freeing frames).
36723672
**/
3673-
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
3673+
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
3674+
u32 flags)
36743675
{
36753676
struct i40e_netdev_priv *np = netdev_priv(dev);
36763677
unsigned int queue_index = smp_processor_id();
36773678
struct i40e_vsi *vsi = np->vsi;
3679+
struct i40e_ring *xdp_ring;
36783680
int drops = 0;
36793681
int i;
36803682

@@ -3684,17 +3686,25 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
36843686
if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
36853687
return -ENXIO;
36863688

3689+
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
3690+
return -EINVAL;
3691+
3692+
xdp_ring = vsi->xdp_rings[queue_index];
3693+
36873694
for (i = 0; i < n; i++) {
36883695
struct xdp_frame *xdpf = frames[i];
36893696
int err;
36903697

3691-
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
3698+
err = i40e_xmit_xdp_ring(xdpf, xdp_ring);
36923699
if (err != I40E_XDP_TX) {
36933700
xdp_return_frame_rx_napi(xdpf);
36943701
drops++;
36953702
}
36963703
}
36973704

3705+
if (unlikely(flags & XDP_XMIT_FLUSH))
3706+
i40e_xdp_ring_update_tail(xdp_ring);
3707+
36983708
return n - drops;
36993709
}
37003710

drivers/net/ethernet/intel/i40e/i40e_txrx.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,8 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
487487
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
488488
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
489489
bool __i40e_chk_linearize(struct sk_buff *skb);
490-
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
490+
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
491+
u32 flags);
491492
void i40e_xdp_flush(struct net_device *dev);
492493

493494
/**

drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

+17-6
Original file line numberDiff line numberDiff line change
@@ -10022,8 +10022,17 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1002210022
}
1002310023
}
1002410024

10025+
static void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring)
10026+
{
10027+
/* Force memory writes to complete before letting h/w know there
10028+
* are new descriptors to fetch.
10029+
*/
10030+
wmb();
10031+
writel(ring->next_to_use, ring->tail);
10032+
}
10033+
1002510034
static int ixgbe_xdp_xmit(struct net_device *dev, int n,
10026-
struct xdp_frame **frames)
10035+
struct xdp_frame **frames, u32 flags)
1002710036
{
1002810037
struct ixgbe_adapter *adapter = netdev_priv(dev);
1002910038
struct ixgbe_ring *ring;
@@ -10033,6 +10042,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
1003310042
if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
1003410043
return -ENETDOWN;
1003510044

10045+
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
10046+
return -EINVAL;
10047+
1003610048
/* During program transitions its possible adapter->xdp_prog is assigned
1003710049
* but ring has not been configured yet. In this case simply abort xmit.
1003810050
*/
@@ -10051,6 +10063,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
1005110063
}
1005210064
}
1005310065

10066+
if (unlikely(flags & XDP_XMIT_FLUSH))
10067+
ixgbe_xdp_ring_update_tail(ring);
10068+
1005410069
return n - drops;
1005510070
}
1005610071

@@ -10069,11 +10084,7 @@ static void ixgbe_xdp_flush(struct net_device *dev)
1006910084
if (unlikely(!ring))
1007010085
return;
1007110086

10072-
/* Force memory writes to complete before letting h/w know there
10073-
* are new descriptors to fetch.
10074-
*/
10075-
wmb();
10076-
writel(ring->next_to_use, ring->tail);
10087+
ixgbe_xdp_ring_update_tail(ring);
1007710088

1007810089
return;
1007910090
}

drivers/net/tun.c

+18-7
Original file line numberDiff line numberDiff line change
@@ -1285,7 +1285,16 @@ static const struct net_device_ops tun_netdev_ops = {
12851285
.ndo_get_stats64 = tun_net_get_stats64,
12861286
};
12871287

1288-
static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
1288+
static void __tun_xdp_flush_tfile(struct tun_file *tfile)
1289+
{
1290+
/* Notify and wake up reader process */
1291+
if (tfile->flags & TUN_FASYNC)
1292+
kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
1293+
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
1294+
}
1295+
1296+
static int tun_xdp_xmit(struct net_device *dev, int n,
1297+
struct xdp_frame **frames, u32 flags)
12891298
{
12901299
struct tun_struct *tun = netdev_priv(dev);
12911300
struct tun_file *tfile;
@@ -1294,6 +1303,9 @@ static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames
12941303
int cnt = n;
12951304
int i;
12961305

1306+
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
1307+
return -EINVAL;
1308+
12971309
rcu_read_lock();
12981310

12991311
numqueues = READ_ONCE(tun->numqueues);
@@ -1321,6 +1333,9 @@ static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames
13211333
}
13221334
spin_unlock(&tfile->tx_ring.producer_lock);
13231335

1336+
if (flags & XDP_XMIT_FLUSH)
1337+
__tun_xdp_flush_tfile(tfile);
1338+
13241339
rcu_read_unlock();
13251340
return cnt - drops;
13261341
}
@@ -1332,7 +1347,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
13321347
if (unlikely(!frame))
13331348
return -EOVERFLOW;
13341349

1335-
return tun_xdp_xmit(dev, 1, &frame);
1350+
return tun_xdp_xmit(dev, 1, &frame, 0);
13361351
}
13371352

13381353
static void tun_xdp_flush(struct net_device *dev)
@@ -1349,11 +1364,7 @@ static void tun_xdp_flush(struct net_device *dev)
13491364

13501365
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
13511366
numqueues]);
1352-
/* Notify and wake up reader process */
1353-
if (tfile->flags & TUN_FASYNC)
1354-
kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
1355-
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
1356-
1367+
__tun_xdp_flush_tfile(tfile);
13571368
out:
13581369
rcu_read_unlock();
13591370
}

drivers/net/virtio_net.c

+8-1
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
468468
}
469469

470470
static int virtnet_xdp_xmit(struct net_device *dev,
471-
int n, struct xdp_frame **frames)
471+
int n, struct xdp_frame **frames, u32 flags)
472472
{
473473
struct virtnet_info *vi = netdev_priv(dev);
474474
struct receive_queue *rq = vi->rq;
@@ -481,6 +481,9 @@ static int virtnet_xdp_xmit(struct net_device *dev,
481481
int err;
482482
int i;
483483

484+
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
485+
return -EINVAL;
486+
484487
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
485488
sq = &vi->sq[qp];
486489

@@ -504,6 +507,10 @@ static int virtnet_xdp_xmit(struct net_device *dev,
504507
drops++;
505508
}
506509
}
510+
511+
if (flags & XDP_XMIT_FLUSH)
512+
virtqueue_kick(sq->vq);
513+
507514
return n - drops;
508515
}
509516

include/linux/netdevice.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -1185,13 +1185,13 @@ struct dev_ifalias {
11851185
* This function is used to set or query state related to XDP on the
11861186
* netdevice and manage BPF offload. See definition of
11871187
* enum bpf_netdev_command for details.
1188-
* int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
1188+
* int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp,
1189+
* u32 flags);
11891190
* This function is used to submit @n XDP packets for transmit on a
11901191
* netdevice. Returns number of frames successfully transmitted, frames
11911192
* that got dropped are freed/returned via xdp_return_frame().
11921193
* Returns negative number, means general error invoking ndo, meaning
11931194
* no frames were xmit'ed and core-caller will free all frames.
1194-
* TODO: Consider add flag to allow sending flush operation.
11951195
* void (*ndo_xdp_flush)(struct net_device *dev);
11961196
* This function is used to inform the driver to flush a particular
11971197
* xdp tx queue. Must be called on same CPU as xdp_xmit.
@@ -1380,7 +1380,8 @@ struct net_device_ops {
13801380
int (*ndo_bpf)(struct net_device *dev,
13811381
struct netdev_bpf *bpf);
13821382
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
1383-
struct xdp_frame **xdp);
1383+
struct xdp_frame **xdp,
1384+
u32 flags);
13841385
void (*ndo_xdp_flush)(struct net_device *dev);
13851386
};
13861387

include/net/xdp.h

+4
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ enum xdp_mem_type {
4040
MEM_TYPE_MAX,
4141
};
4242

43+
/* XDP flags for ndo_xdp_xmit */
44+
#define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */
45+
#define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH
46+
4347
struct xdp_mem_info {
4448
u32 type; /* enum xdp_mem_type, but known size type */
4549
u32 id;

kernel/bpf/devmap.c

+6-13
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
217217
}
218218

219219
static int bq_xmit_all(struct bpf_dtab_netdev *obj,
220-
struct xdp_bulk_queue *bq)
220+
struct xdp_bulk_queue *bq, u32 flags)
221221
{
222222
struct net_device *dev = obj->dev;
223223
int sent = 0, drops = 0, err = 0;
@@ -232,7 +232,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
232232
prefetch(xdpf);
233233
}
234234

235-
sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q);
235+
sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
236236
if (sent < 0) {
237237
err = sent;
238238
sent = 0;
@@ -276,7 +276,6 @@ void __dev_map_flush(struct bpf_map *map)
276276
for_each_set_bit(bit, bitmap, map->max_entries) {
277277
struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
278278
struct xdp_bulk_queue *bq;
279-
struct net_device *netdev;
280279

281280
/* This is possible if the dev entry is removed by user space
282281
* between xdp redirect and flush op.
@@ -287,10 +286,7 @@ void __dev_map_flush(struct bpf_map *map)
287286
__clear_bit(bit, bitmap);
288287

289288
bq = this_cpu_ptr(dev->bulkq);
290-
bq_xmit_all(dev, bq);
291-
netdev = dev->dev;
292-
if (likely(netdev->netdev_ops->ndo_xdp_flush))
293-
netdev->netdev_ops->ndo_xdp_flush(netdev);
289+
bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
294290
}
295291
}
296292

@@ -320,7 +316,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
320316
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
321317

322318
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
323-
bq_xmit_all(obj, bq);
319+
bq_xmit_all(obj, bq, 0);
324320

325321
/* Ingress dev_rx will be the same for all xdp_frame's in
326322
* bulk_queue, because bq stored per-CPU and must be flushed
@@ -359,8 +355,7 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
359355

360356
static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
361357
{
362-
if (dev->dev->netdev_ops->ndo_xdp_flush) {
363-
struct net_device *fl = dev->dev;
358+
if (dev->dev->netdev_ops->ndo_xdp_xmit) {
364359
struct xdp_bulk_queue *bq;
365360
unsigned long *bitmap;
366361

@@ -371,9 +366,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
371366
__clear_bit(dev->bit, bitmap);
372367

373368
bq = per_cpu_ptr(dev->bulkq, cpu);
374-
bq_xmit_all(dev, bq);
375-
376-
fl->netdev_ops->ndo_xdp_flush(dev->dev);
369+
bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
377370
}
378371
}
379372
}

net/core/filter.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -3056,10 +3056,9 @@ static int __bpf_tx_xdp(struct net_device *dev,
30563056
if (unlikely(!xdpf))
30573057
return -EOVERFLOW;
30583058

3059-
sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf);
3059+
sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
30603060
if (sent <= 0)
30613061
return sent;
3062-
dev->netdev_ops->ndo_xdp_flush(dev);
30633062
return 0;
30643063
}
30653064

0 commit comments

Comments
 (0)