Skip to content

Commit f663dd9

Browse files
jasowangdavem330
authored andcommitted
net: core: explicitly select a txq before doing l2 forwarding
Currently, the tx queue were selected implicitly in ndo_dfwd_start_xmit(). The will cause several issues: - NETIF_F_LLTX were removed for macvlan, so txq lock were done for macvlan instead of lower device which misses the necessary txq synchronization for lower device such as txq stopping or frozen required by dev watchdog or control path. - dev_hard_start_xmit() was called with NULL txq which bypasses the net device watchdog. - dev_hard_start_xmit() does not check txq everywhere which will lead a crash when tso is disabled for lower device. Fix this by explicitly introducing a new param for .ndo_select_queue() for just selecting queues in the case of l2 forwarding offload. netdev_pick_tx() was also extended to accept this parameter and dev_queue_xmit_accel() was used to do l2 forwarding transmission. With this fixes, NETIF_F_LLTX could be preserved for macvlan and there's no need to check txq against NULL in dev_hard_start_xmit(). Also there's no need to keep a dedicated ndo_dfwd_start_xmit() and we can just reuse the code of dev_queue_xmit() to do the transmission. In the future, it was also required for macvtap l2 forwarding support since it provides a necessary synchronization method. Cc: John Fastabend <[email protected]> Cc: Neil Horman <[email protected]> Cc: [email protected] Signed-off-by: Jason Wang <[email protected]> Acked-by: Neil Horman <[email protected]> Acked-by: John Fastabend <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent b13ba1b commit f663dd9

File tree

21 files changed

+80
-62
lines changed

21 files changed

+80
-62
lines changed

drivers/net/bonding/bond_main.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3732,7 +3732,8 @@ static inline int bond_slave_override(struct bonding *bond,
37323732
}
37333733

37343734

3735-
static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb)
3735+
static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
3736+
void *accel_priv)
37363737
{
37373738
/*
37383739
* This helper function exists to help dev_pick_tx get the correct

drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1833,7 +1833,8 @@ void bnx2x_netif_stop(struct bnx2x *bp, int disable_hw)
18331833
bnx2x_napi_disable_cnic(bp);
18341834
}
18351835

1836-
u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb)
1836+
u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
1837+
void *accel_priv)
18371838
{
18381839
struct bnx2x *bp = netdev_priv(dev);
18391840

drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,8 @@ int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac);
524524
int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos);
525525

526526
/* select_queue callback */
527-
u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb);
527+
u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
528+
void *accel_priv);
528529

529530
static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
530531
struct bnx2x_fastpath *fp,

drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6827,12 +6827,20 @@ static inline int ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
68276827
return __ixgbe_maybe_stop_tx(tx_ring, size);
68286828
}
68296829

6830-
#ifdef IXGBE_FCOE
6831-
static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb)
6830+
static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
6831+
void *accel_priv)
68326832
{
6833+
struct ixgbe_fwd_adapter *fwd_adapter = accel_priv;
6834+
#ifdef IXGBE_FCOE
68336835
struct ixgbe_adapter *adapter;
68346836
struct ixgbe_ring_feature *f;
68356837
int txq;
6838+
#endif
6839+
6840+
if (fwd_adapter)
6841+
return skb->queue_mapping + fwd_adapter->tx_base_queue;
6842+
6843+
#ifdef IXGBE_FCOE
68366844

68376845
/*
68386846
* only execute the code below if protocol is FCoE
@@ -6858,9 +6866,11 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb)
68586866
txq -= f->indices;
68596867

68606868
return txq + f->offset;
6869+
#else
6870+
return __netdev_pick_tx(dev, skb);
6871+
#endif
68616872
}
68626873

6863-
#endif
68646874
netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
68656875
struct ixgbe_adapter *adapter,
68666876
struct ixgbe_ring *tx_ring)
@@ -7629,27 +7639,11 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
76297639
kfree(fwd_adapter);
76307640
}
76317641

7632-
static netdev_tx_t ixgbe_fwd_xmit(struct sk_buff *skb,
7633-
struct net_device *dev,
7634-
void *priv)
7635-
{
7636-
struct ixgbe_fwd_adapter *fwd_adapter = priv;
7637-
unsigned int queue;
7638-
struct ixgbe_ring *tx_ring;
7639-
7640-
queue = skb->queue_mapping + fwd_adapter->tx_base_queue;
7641-
tx_ring = fwd_adapter->real_adapter->tx_ring[queue];
7642-
7643-
return __ixgbe_xmit_frame(skb, dev, tx_ring);
7644-
}
7645-
76467642
static const struct net_device_ops ixgbe_netdev_ops = {
76477643
.ndo_open = ixgbe_open,
76487644
.ndo_stop = ixgbe_close,
76497645
.ndo_start_xmit = ixgbe_xmit_frame,
7650-
#ifdef IXGBE_FCOE
76517646
.ndo_select_queue = ixgbe_select_queue,
7652-
#endif
76537647
.ndo_set_rx_mode = ixgbe_set_rx_mode,
76547648
.ndo_validate_addr = eth_validate_addr,
76557649
.ndo_set_mac_address = ixgbe_set_mac,
@@ -7689,7 +7683,6 @@ static const struct net_device_ops ixgbe_netdev_ops = {
76897683
.ndo_bridge_getlink = ixgbe_ndo_bridge_getlink,
76907684
.ndo_dfwd_add_station = ixgbe_fwd_add,
76917685
.ndo_dfwd_del_station = ixgbe_fwd_del,
7692-
.ndo_dfwd_start_xmit = ixgbe_fwd_xmit,
76937686
};
76947687

76957688
/**

drivers/net/ethernet/lantiq_etop.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,8 @@ ltq_etop_set_multicast_list(struct net_device *dev)
619619
}
620620

621621
static u16
622-
ltq_etop_select_queue(struct net_device *dev, struct sk_buff *skb)
622+
ltq_etop_select_queue(struct net_device *dev, struct sk_buff *skb,
623+
void *accel_priv)
623624
{
624625
/* we are currently only using the first queue */
625626
return 0;

drivers/net/ethernet/mellanox/mlx4/en_tx.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,8 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *sk
592592
}
593593
}
594594

595-
u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb)
595+
u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
596+
void *accel_priv)
596597
{
597598
struct mlx4_en_priv *priv = netdev_priv(dev);
598599
u16 rings_p_up = priv->num_tx_rings_p_up;

drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -714,7 +714,8 @@ int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
714714
int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
715715

716716
void mlx4_en_tx_irq(struct mlx4_cq *mcq);
717-
u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb);
717+
u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
718+
void *accel_priv);
718719
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
719720

720721
int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,

drivers/net/ethernet/tile/tilegx.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2080,7 +2080,8 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
20802080
}
20812081

20822082
/* Return subqueue id on this core (one per core). */
2083-
static u16 tile_net_select_queue(struct net_device *dev, struct sk_buff *skb)
2083+
static u16 tile_net_select_queue(struct net_device *dev, struct sk_buff *skb,
2084+
void *accel_priv)
20842085
{
20852086
return smp_processor_id();
20862087
}

drivers/net/macvlan.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
299299

300300
if (vlan->fwd_priv) {
301301
skb->dev = vlan->lowerdev;
302-
ret = dev_hard_start_xmit(skb, skb->dev, NULL, vlan->fwd_priv);
302+
ret = dev_queue_xmit_accel(skb, vlan->fwd_priv);
303303
} else {
304304
ret = macvlan_queue_xmit(skb, dev);
305305
}
@@ -365,10 +365,8 @@ static int macvlan_open(struct net_device *dev)
365365
*/
366366
if (IS_ERR_OR_NULL(vlan->fwd_priv)) {
367367
vlan->fwd_priv = NULL;
368-
} else {
369-
dev->features &= ~NETIF_F_LLTX;
368+
} else
370369
return 0;
371-
}
372370
}
373371

374372
err = -EBUSY;
@@ -702,8 +700,7 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev,
702700
features = netdev_increment_features(vlan->lowerdev->features,
703701
features,
704702
mask);
705-
if (!vlan->fwd_priv)
706-
features |= NETIF_F_LLTX;
703+
features |= NETIF_F_LLTX;
707704

708705
return features;
709706
}

drivers/net/team/team.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1647,7 +1647,8 @@ static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
16471647
return NETDEV_TX_OK;
16481648
}
16491649

1650-
static u16 team_select_queue(struct net_device *dev, struct sk_buff *skb)
1650+
static u16 team_select_queue(struct net_device *dev, struct sk_buff *skb,
1651+
void *accel_priv)
16511652
{
16521653
/*
16531654
* This helper function exists to help dev_pick_tx get the correct

drivers/net/tun.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,8 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
348348
* different rxq no. here. If we could not get rxhash, then we would
349349
* hope the rxq no. may help here.
350350
*/
351-
static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb)
351+
static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
352+
void *accel_priv)
352353
{
353354
struct tun_struct *tun = netdev_priv(dev);
354355
struct tun_flow_entry *e;

drivers/net/wireless/mwifiex/main.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,8 @@ static struct net_device_stats *mwifiex_get_stats(struct net_device *dev)
746746
}
747747

748748
static u16
749-
mwifiex_netdev_select_wmm_queue(struct net_device *dev, struct sk_buff *skb)
749+
mwifiex_netdev_select_wmm_queue(struct net_device *dev, struct sk_buff *skb,
750+
void *accel_priv)
750751
{
751752
skb->priority = cfg80211_classify8021d(skb);
752753
return mwifiex_1d_to_wmm_queue[skb->priority];

drivers/staging/bcm/Bcmnet.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ static INT bcm_close(struct net_device *dev)
3939
return 0;
4040
}
4141

42-
static u16 bcm_select_queue(struct net_device *dev, struct sk_buff *skb)
42+
static u16 bcm_select_queue(struct net_device *dev, struct sk_buff *skb,
43+
void *accel_priv)
4344
{
4445
return ClassifyPacket(netdev_priv(dev), skb);
4546
}

drivers/staging/netlogic/xlr_net.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,8 @@ static netdev_tx_t xlr_net_start_xmit(struct sk_buff *skb,
306306
return NETDEV_TX_OK;
307307
}
308308

309-
static u16 xlr_net_select_queue(struct net_device *ndev, struct sk_buff *skb)
309+
static u16 xlr_net_select_queue(struct net_device *ndev, struct sk_buff *skb,
310+
void *accel_priv)
310311
{
311312
return (u16)smp_processor_id();
312313
}

drivers/staging/rtl8188eu/os_dep/os_intfs.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,8 @@ static unsigned int rtw_classify8021d(struct sk_buff *skb)
652652
return dscp >> 5;
653653
}
654654

655-
static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb)
655+
static u16 rtw_select_queue(struct net_device *dev, struct sk_buff *skb,
656+
void *accel_priv)
656657
{
657658
struct adapter *padapter = rtw_netdev_priv(dev);
658659
struct mlme_priv *pmlmepriv = &padapter->mlmepriv;

include/linux/netdevice.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -769,7 +769,8 @@ struct netdev_phys_port_id {
769769
* (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
770770
* Required can not be NULL.
771771
*
772-
* u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb);
772+
* u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
773+
* void *accel_priv);
773774
* Called to decide which queue to when device supports multiple
774775
* transmit queues.
775776
*
@@ -990,7 +991,8 @@ struct net_device_ops {
990991
netdev_tx_t (*ndo_start_xmit) (struct sk_buff *skb,
991992
struct net_device *dev);
992993
u16 (*ndo_select_queue)(struct net_device *dev,
993-
struct sk_buff *skb);
994+
struct sk_buff *skb,
995+
void *accel_priv);
994996
void (*ndo_change_rx_flags)(struct net_device *dev,
995997
int flags);
996998
void (*ndo_set_rx_mode)(struct net_device *dev);
@@ -1529,7 +1531,8 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
15291531
}
15301532

15311533
struct netdev_queue *netdev_pick_tx(struct net_device *dev,
1532-
struct sk_buff *skb);
1534+
struct sk_buff *skb,
1535+
void *accel_priv);
15331536
u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb);
15341537

15351538
/*
@@ -1819,6 +1822,7 @@ int dev_close(struct net_device *dev);
18191822
void dev_disable_lro(struct net_device *dev);
18201823
int dev_loopback_xmit(struct sk_buff *newskb);
18211824
int dev_queue_xmit(struct sk_buff *skb);
1825+
int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
18221826
int register_netdevice(struct net_device *dev);
18231827
void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
18241828
void unregister_netdevice_many(struct list_head *head);
@@ -2426,7 +2430,7 @@ int dev_change_carrier(struct net_device *, bool new_carrier);
24262430
int dev_get_phys_port_id(struct net_device *dev,
24272431
struct netdev_phys_port_id *ppid);
24282432
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2429-
struct netdev_queue *txq, void *accel_priv);
2433+
struct netdev_queue *txq);
24302434
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
24312435

24322436
extern int netdev_budget;

net/core/dev.c

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2539,7 +2539,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
25392539
}
25402540

25412541
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2542-
struct netdev_queue *txq, void *accel_priv)
2542+
struct netdev_queue *txq)
25432543
{
25442544
const struct net_device_ops *ops = dev->netdev_ops;
25452545
int rc = NETDEV_TX_OK;
@@ -2605,13 +2605,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
26052605
dev_queue_xmit_nit(skb, dev);
26062606

26072607
skb_len = skb->len;
2608-
if (accel_priv)
2609-
rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv);
2610-
else
26112608
rc = ops->ndo_start_xmit(skb, dev);
26122609

26132610
trace_net_dev_xmit(skb, rc, dev, skb_len);
2614-
if (rc == NETDEV_TX_OK && txq)
2611+
if (rc == NETDEV_TX_OK)
26152612
txq_trans_update(txq);
26162613
return rc;
26172614
}
@@ -2627,10 +2624,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
26272624
dev_queue_xmit_nit(nskb, dev);
26282625

26292626
skb_len = nskb->len;
2630-
if (accel_priv)
2631-
rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv);
2632-
else
2633-
rc = ops->ndo_start_xmit(nskb, dev);
2627+
rc = ops->ndo_start_xmit(nskb, dev);
26342628
trace_net_dev_xmit(nskb, rc, dev, skb_len);
26352629
if (unlikely(rc != NETDEV_TX_OK)) {
26362630
if (rc & ~NETDEV_TX_MASK)
@@ -2811,7 +2805,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
28112805
* the BH enable code must have IRQs enabled so that it will not deadlock.
28122806
* --BLG
28132807
*/
2814-
int dev_queue_xmit(struct sk_buff *skb)
2808+
int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
28152809
{
28162810
struct net_device *dev = skb->dev;
28172811
struct netdev_queue *txq;
@@ -2827,7 +2821,7 @@ int dev_queue_xmit(struct sk_buff *skb)
28272821

28282822
skb_update_prio(skb);
28292823

2830-
txq = netdev_pick_tx(dev, skb);
2824+
txq = netdev_pick_tx(dev, skb, accel_priv);
28312825
q = rcu_dereference_bh(txq->qdisc);
28322826

28332827
#ifdef CONFIG_NET_CLS_ACT
@@ -2863,7 +2857,7 @@ int dev_queue_xmit(struct sk_buff *skb)
28632857

28642858
if (!netif_xmit_stopped(txq)) {
28652859
__this_cpu_inc(xmit_recursion);
2866-
rc = dev_hard_start_xmit(skb, dev, txq, NULL);
2860+
rc = dev_hard_start_xmit(skb, dev, txq);
28672861
__this_cpu_dec(xmit_recursion);
28682862
if (dev_xmit_complete(rc)) {
28692863
HARD_TX_UNLOCK(dev, txq);
@@ -2892,8 +2886,19 @@ int dev_queue_xmit(struct sk_buff *skb)
28922886
rcu_read_unlock_bh();
28932887
return rc;
28942888
}
2889+
2890+
int dev_queue_xmit(struct sk_buff *skb)
2891+
{
2892+
return __dev_queue_xmit(skb, NULL);
2893+
}
28952894
EXPORT_SYMBOL(dev_queue_xmit);
28962895

2896+
int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
2897+
{
2898+
return __dev_queue_xmit(skb, accel_priv);
2899+
}
2900+
EXPORT_SYMBOL(dev_queue_xmit_accel);
2901+
28972902

28982903
/*=======================================================================
28992904
Receiver routines

net/core/flow_dissector.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -395,17 +395,21 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
395395
EXPORT_SYMBOL(__netdev_pick_tx);
396396

397397
struct netdev_queue *netdev_pick_tx(struct net_device *dev,
398-
struct sk_buff *skb)
398+
struct sk_buff *skb,
399+
void *accel_priv)
399400
{
400401
int queue_index = 0;
401402

402403
if (dev->real_num_tx_queues != 1) {
403404
const struct net_device_ops *ops = dev->netdev_ops;
404405
if (ops->ndo_select_queue)
405-
queue_index = ops->ndo_select_queue(dev, skb);
406+
queue_index = ops->ndo_select_queue(dev, skb,
407+
accel_priv);
406408
else
407409
queue_index = __netdev_pick_tx(dev, skb);
408-
queue_index = dev_cap_txqueue(dev, queue_index);
410+
411+
if (!accel_priv)
412+
queue_index = dev_cap_txqueue(dev, queue_index);
409413
}
410414

411415
skb_set_queue_mapping(skb, queue_index);

net/core/netpoll.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
375375
if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
376376
struct netdev_queue *txq;
377377

378-
txq = netdev_pick_tx(dev, skb);
378+
txq = netdev_pick_tx(dev, skb, NULL);
379379

380380
/* try until next clock tick */
381381
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;

0 commit comments

Comments
 (0)