Skip to content

Commit 98b2c04

Browse files
committed
Merge branch 'mlxsw-add-vxlan-to-the-same-hardware-domain-as-physical-bridge-ports'
Petr Machata says: ==================== mlxsw: Add VXLAN to the same hardware domain as physical bridge ports Amit Cohen writes: Packets which are trapped to CPU for forwarding in software data path are handled according to driver marking of skb->offload_{,l3}_fwd_mark. Packets which are marked as L2-forwarded in hardware, will not be flooded by the bridge to bridge ports which are in the same hardware domain as the ingress port. Currently, mlxsw does not add VXLAN bridge ports to the same hardware domain as physical bridge ports despite the fact that the device is able to forward packets to and from VXLAN tunnels in hardware. In some scenarios this can result in remote VTEPs receiving duplicate packets. To solve such packets duplication, add VXLAN bridge ports to the same hardware domain as other bridge ports. One complication is ARP suppression which requires the local VTEP to avoid flooding ARP packets to remote VTEPs if the local VTEP is able to reply on behalf of remote hosts. This is currently implemented by having the device flood ARP packets in hardware and trapping them during VXLAN encapsulation, but marking them with skb->offload_fwd_mark=1 so that the bridge will not re-flood them to physical bridge ports. The above scheme will break when VXLAN bridge ports are added to the same hardware domain as physical bridge ports as ARP packets that cannot be suppressed by the bridge will not be able to egress the VXLAN bridge ports due to hardware domain filtering. This is solved by trapping ARP packets when they enter the device and not marking them as being forwarded in hardware. Patch set overview: Patch #1 sets hardware to trap ARP packets at layer 2 Patches #2-#4 are preparations for setting hardwarwe domain of VXLAN Patch #5 sets hardware domain of VXLAN Patch #6 extends VXLAN flood test to verify that this set solves the packets duplication ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents c353e89 + 36ed81b commit 98b2c04

File tree

7 files changed

+83
-49
lines changed

7 files changed

+83
-49
lines changed

Diff for: drivers/net/ethernet/mellanox/mlxsw/spectrum.c

+4-18
Original file line numberDiff line numberDiff line change
@@ -2409,8 +2409,6 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
24092409
/* Multicast Router Traps */
24102410
MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
24112411
MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
2412-
/* NVE traps */
2413-
MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, NEIGH_DISCOVERY, false),
24142412
};
24152413

24162414
static const struct mlxsw_listener mlxsw_sp1_listener[] = {
@@ -5232,25 +5230,13 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp,
52325230
return 0;
52335231
if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
52345232
return -EOPNOTSUPP;
5235-
if (cu_info->linking) {
5236-
if (!netif_running(dev))
5237-
return 0;
5238-
/* When the bridge is VLAN-aware, the VNI of the VxLAN
5239-
* device needs to be mapped to a VLAN, but at this
5240-
* point no VLANs are configured on the VxLAN device
5241-
*/
5242-
if (br_vlan_enabled(upper_dev))
5243-
return 0;
5233+
if (!netif_running(dev))
5234+
return 0;
5235+
if (cu_info->linking)
52445236
return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev,
52455237
dev, 0, extack);
5246-
} else {
5247-
/* VLANs were already flushed, which triggered the
5248-
* necessary cleanup
5249-
*/
5250-
if (br_vlan_enabled(upper_dev))
5251-
return 0;
5238+
else
52525239
mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, dev);
5253-
}
52545240
break;
52555241
case NETDEV_PRE_UP:
52565242
upper_dev = netdev_master_upper_dev_get(dev);

Diff for: drivers/net/ethernet/mellanox/mlxsw/spectrum.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -661,10 +661,10 @@ bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
661661
const struct net_device *br_dev);
662662
int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
663663
const struct net_device *br_dev,
664-
const struct net_device *vxlan_dev, u16 vid,
664+
struct net_device *vxlan_dev, u16 vid,
665665
struct netlink_ext_ack *extack);
666666
void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
667-
const struct net_device *vxlan_dev);
667+
struct net_device *vxlan_dev);
668668
extern struct notifier_block mlxsw_sp_switchdev_notifier;
669669

670670
/* spectrum.c */

Diff for: drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c

+46-20
Original file line numberDiff line numberDiff line change
@@ -2929,23 +2929,8 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
29292929
mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port);
29302930
}
29312931

2932-
int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
2933-
const struct net_device *br_dev,
2934-
const struct net_device *vxlan_dev, u16 vid,
2935-
struct netlink_ext_ack *extack)
2936-
{
2937-
struct mlxsw_sp_bridge_device *bridge_device;
2938-
2939-
bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
2940-
if (WARN_ON(!bridge_device))
2941-
return -EINVAL;
2942-
2943-
return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid,
2944-
extack);
2945-
}
2946-
2947-
void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
2948-
const struct net_device *vxlan_dev)
2932+
static void __mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
2933+
const struct net_device *vxlan_dev)
29492934
{
29502935
struct vxlan_dev *vxlan = netdev_priv(vxlan_dev);
29512936
struct mlxsw_sp_fid *fid;
@@ -2963,6 +2948,47 @@ void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
29632948
mlxsw_sp_fid_put(fid);
29642949
}
29652950

2951+
int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
2952+
const struct net_device *br_dev,
2953+
struct net_device *vxlan_dev, u16 vid,
2954+
struct netlink_ext_ack *extack)
2955+
{
2956+
struct mlxsw_sp_bridge_device *bridge_device;
2957+
struct mlxsw_sp_port *mlxsw_sp_port;
2958+
int err;
2959+
2960+
bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
2961+
if (WARN_ON(!bridge_device))
2962+
return -EINVAL;
2963+
2964+
mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(bridge_device->dev);
2965+
if (!mlxsw_sp_port)
2966+
return -EINVAL;
2967+
2968+
err = bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid,
2969+
extack);
2970+
if (err)
2971+
return err;
2972+
2973+
err = switchdev_bridge_port_offload(vxlan_dev, mlxsw_sp_port->dev,
2974+
NULL, NULL, NULL, false, extack);
2975+
if (err)
2976+
goto err_bridge_port_offload;
2977+
2978+
return 0;
2979+
2980+
err_bridge_port_offload:
2981+
__mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
2982+
return err;
2983+
}
2984+
2985+
void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
2986+
struct net_device *vxlan_dev)
2987+
{
2988+
switchdev_bridge_port_unoffload(vxlan_dev, NULL, NULL, NULL);
2989+
__mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
2990+
}
2991+
29662992
static void
29672993
mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr,
29682994
enum mlxsw_sp_l3proto *proto,
@@ -3867,7 +3893,7 @@ mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp,
38673893
mlxsw_sp_fid_put(fid);
38683894
return -EINVAL;
38693895
}
3870-
mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
3896+
__mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
38713897
mlxsw_sp_fid_put(fid);
38723898
return 0;
38733899
}
@@ -3883,7 +3909,7 @@ mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp,
38833909
/* Fourth case: Thew new VLAN is PVID, which means the VLAN currently
38843910
* mapped to the VNI should be unmapped
38853911
*/
3886-
mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
3912+
__mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
38873913
mlxsw_sp_fid_put(fid);
38883914

38893915
/* Fifth case: The new VLAN is also egress untagged, which means the
@@ -3923,7 +3949,7 @@ mlxsw_sp_switchdev_vxlan_vlan_del(struct mlxsw_sp *mlxsw_sp,
39233949
if (mlxsw_sp_fid_8021q_vid(fid) != vid)
39243950
goto out;
39253951

3926-
mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
3952+
__mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev);
39273953

39283954
out:
39293955
mlxsw_sp_fid_put(fid);

Diff for: drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -959,18 +959,18 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
959959
},
960960
{
961961
.trap = MLXSW_SP_TRAP_CONTROL(ARP_REQUEST, NEIGH_DISCOVERY,
962-
MIRROR),
962+
TRAP),
963963
.listeners_arr = {
964-
MLXSW_SP_RXL_MARK(ROUTER_ARPBC, NEIGH_DISCOVERY,
965-
TRAP_TO_CPU, false),
964+
MLXSW_SP_RXL_NO_MARK(ARPBC, NEIGH_DISCOVERY,
965+
TRAP_TO_CPU, false),
966966
},
967967
},
968968
{
969969
.trap = MLXSW_SP_TRAP_CONTROL(ARP_RESPONSE, NEIGH_DISCOVERY,
970-
MIRROR),
970+
TRAP),
971971
.listeners_arr = {
972-
MLXSW_SP_RXL_MARK(ROUTER_ARPUC, NEIGH_DISCOVERY,
973-
TRAP_TO_CPU, false),
972+
MLXSW_SP_RXL_NO_MARK(ARPUC, NEIGH_DISCOVERY,
973+
TRAP_TO_CPU, false),
974974
},
975975
},
976976
{

Diff for: drivers/net/ethernet/mellanox/mlxsw/trap.h

+2-3
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ enum {
2929
MLXSW_TRAP_ID_FDB_MISMATCH = 0x3B,
3030
MLXSW_TRAP_ID_FID_MISS = 0x3D,
3131
MLXSW_TRAP_ID_DECAP_ECN0 = 0x40,
32+
MLXSW_TRAP_ID_ARPBC = 0x50,
33+
MLXSW_TRAP_ID_ARPUC = 0x51,
3234
MLXSW_TRAP_ID_MTUERROR = 0x52,
3335
MLXSW_TRAP_ID_TTLERROR = 0x53,
3436
MLXSW_TRAP_ID_LBERROR = 0x54,
@@ -66,13 +68,10 @@ enum {
6668
MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92,
6769
MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1,
6870
MLXSW_TRAP_ID_NVE_DECAP_ARP = 0xB8,
69-
MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD,
7071
MLXSW_TRAP_ID_IPV4_BFD = 0xD0,
7172
MLXSW_TRAP_ID_IPV6_BFD = 0xD1,
7273
MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
7374
MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
74-
MLXSW_TRAP_ID_ROUTER_ARPBC = 0xE0,
75-
MLXSW_TRAP_ID_ROUTER_ARPUC = 0xE1,
7675
MLXSW_TRAP_ID_DISCARD_NON_ROUTABLE = 0x11A,
7776
MLXSW_TRAP_ID_DISCARD_ROUTER2 = 0x130,
7877
MLXSW_TRAP_ID_DISCARD_ROUTER3 = 0x131,

Diff for: tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh

+8
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,14 @@ __test_flood()
428428
test_flood()
429429
{
430430
__test_flood de:ad:be:ef:13:37 192.0.2.100 "flood"
431+
432+
# Add an entry with arbitrary destination IP. Verify that packets are
433+
# not duplicated (this can happen if hardware floods the packets, and
434+
# then traps them due to misconfiguration, so software data path repeats
435+
# flooding and resends packets).
436+
bridge fdb append dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self
437+
__test_flood de:ad:be:ef:13:37 192.0.2.100 "flood, unresolved FDB entry"
438+
bridge fdb del dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self
431439
}
432440

433441
vxlan_fdb_add_del()

Diff for: tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh

+15
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,21 @@ test_flood()
539539
10 10 0 10 0
540540
__test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \
541541
10 0 10 0 10
542+
543+
# Add entries with arbitrary destination IP. Verify that packets are
544+
# not duplicated (this can happen if hardware floods the packets, and
545+
# then traps them due to misconfiguration, so software data path repeats
546+
# flooding and resends packets).
547+
bridge fdb append dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self
548+
bridge fdb append dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self
549+
550+
__test_flood de:ad:be:ef:13:37 192.0.2.100 10 \
551+
"flood vlan 10, unresolved FDB entry" 10 10 0 10 0
552+
__test_flood ca:fe:be:ef:13:37 198.51.100.100 20 \
553+
"flood vlan 20, unresolved FDB entry" 10 0 10 0 10
554+
555+
bridge fdb del dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self
556+
bridge fdb del dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self
542557
}
543558

544559
vxlan_fdb_add_del()

0 commit comments

Comments
 (0)