Skip to content

Commit 6f9d451

Browse files
tohojoAlexei Starovoitov
authored and
Alexei Starovoitov
committed
xdp: Add devmap_hash map type for looking up devices by hashed index
A common pattern when using xdp_redirect_map() is to create a device map where the lookup key is simply ifindex. Because device maps are arrays, this leaves holes in the map, and the map has to be sized to fit the largest ifindex, regardless of how many devices actually are actually needed in the map. This patch adds a second type of device map where the key is looked up using a hashmap, instead of being used as an array index. This allows maps to be densely packed, so they can be smaller. Signed-off-by: Toke Høiland-Jørgensen <[email protected]> Acked-by: Yonghong Song <[email protected]> Acked-by: Jesper Dangaard Brouer <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent fca16e5 commit 6f9d451

File tree

7 files changed

+220
-3
lines changed

7 files changed

+220
-3
lines changed

include/linux/bpf.h

+7
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,7 @@ struct xdp_buff;
713713
struct sk_buff;
714714

715715
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
716+
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
716717
void __dev_map_flush(struct bpf_map *map);
717718
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
718719
struct net_device *dev_rx);
@@ -799,6 +800,12 @@ static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map,
799800
return NULL;
800801
}
801802

803+
static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map,
804+
u32 key)
805+
{
806+
return NULL;
807+
}
808+
802809
static inline void __dev_map_flush(struct bpf_map *map)
803810
{
804811
}

include/linux/bpf_types.h

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
6262
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
6363
#ifdef CONFIG_NET
6464
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
65+
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
6566
BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
6667
#if defined(CONFIG_BPF_STREAM_PARSER)
6768
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)

include/trace/events/xdp.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,8 @@ struct _bpf_dtab_netdev {
175175
#endif /* __DEVMAP_OBJ_TYPE */
176176

177177
#define devmap_ifindex(fwd, map) \
178-
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
178+
((map->map_type == BPF_MAP_TYPE_DEVMAP || \
179+
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) ? \
179180
((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)
180181

181182
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \

include/uapi/linux/bpf.h

+1
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ enum bpf_map_type {
134134
BPF_MAP_TYPE_QUEUE,
135135
BPF_MAP_TYPE_STACK,
136136
BPF_MAP_TYPE_SK_STORAGE,
137+
BPF_MAP_TYPE_DEVMAP_HASH,
137138
};
138139

139140
/* Note that tracing related programs such as

kernel/bpf/devmap.c

+200
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@
3737
* notifier hook walks the map we know that new dev references can not be
3838
* added by the user because core infrastructure ensures dev_get_by_index()
3939
* calls will fail at this point.
40+
*
41+
* The devmap_hash type is a map type which interprets keys as ifindexes and
42+
* indexes these using a hashmap. This allows maps that use ifindex as key to be
43+
* densely packed instead of having holes in the lookup array for unused
44+
* ifindexes. The setup and packet enqueue/send code is shared between the two
45+
* types of devmap; only the lookup and insertion is different.
4046
*/
4147
#include <linux/bpf.h>
4248
#include <net/xdp.h>
@@ -59,6 +65,7 @@ struct xdp_bulk_queue {
5965

6066
struct bpf_dtab_netdev {
6167
struct net_device *dev; /* must be first member, due to tracepoint */
68+
struct hlist_node index_hlist;
6269
struct bpf_dtab *dtab;
6370
struct xdp_bulk_queue __percpu *bulkq;
6471
struct rcu_head rcu;
@@ -70,11 +77,30 @@ struct bpf_dtab {
7077
struct bpf_dtab_netdev **netdev_map;
7178
struct list_head __percpu *flush_list;
7279
struct list_head list;
80+
81+
/* these are only used for DEVMAP_HASH type maps */
82+
struct hlist_head *dev_index_head;
83+
spinlock_t index_lock;
84+
unsigned int items;
85+
u32 n_buckets;
7386
};
7487

7588
static DEFINE_SPINLOCK(dev_map_lock);
7689
static LIST_HEAD(dev_map_list);
7790

91+
static struct hlist_head *dev_map_create_hash(unsigned int entries)
92+
{
93+
int i;
94+
struct hlist_head *hash;
95+
96+
hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL);
97+
if (hash != NULL)
98+
for (i = 0; i < entries; i++)
99+
INIT_HLIST_HEAD(&hash[i]);
100+
101+
return hash;
102+
}
103+
78104
static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
79105
{
80106
int err, cpu;
@@ -97,6 +123,14 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
97123
cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
98124
cost += sizeof(struct list_head) * num_possible_cpus();
99125

126+
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
127+
dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
128+
129+
if (!dtab->n_buckets) /* Overflow check */
130+
return -EINVAL;
131+
cost += sizeof(struct hlist_head) * dtab->n_buckets;
132+
}
133+
100134
/* if map size is larger than memlock limit, reject it */
101135
err = bpf_map_charge_init(&dtab->map.memory, cost);
102136
if (err)
@@ -115,8 +149,18 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
115149
if (!dtab->netdev_map)
116150
goto free_percpu;
117151

152+
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
153+
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
154+
if (!dtab->dev_index_head)
155+
goto free_map_area;
156+
157+
spin_lock_init(&dtab->index_lock);
158+
}
159+
118160
return 0;
119161

162+
free_map_area:
163+
bpf_map_area_free(dtab->netdev_map);
120164
free_percpu:
121165
free_percpu(dtab->flush_list);
122166
free_charge:
@@ -198,6 +242,7 @@ static void dev_map_free(struct bpf_map *map)
198242

199243
free_percpu(dtab->flush_list);
200244
bpf_map_area_free(dtab->netdev_map);
245+
kfree(dtab->dev_index_head);
201246
kfree(dtab);
202247
}
203248

@@ -218,6 +263,70 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
218263
return 0;
219264
}
220265

266+
static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
267+
int idx)
268+
{
269+
return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
270+
}
271+
272+
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
273+
{
274+
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
275+
struct hlist_head *head = dev_map_index_hash(dtab, key);
276+
struct bpf_dtab_netdev *dev;
277+
278+
hlist_for_each_entry_rcu(dev, head, index_hlist)
279+
if (dev->idx == key)
280+
return dev;
281+
282+
return NULL;
283+
}
284+
285+
static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
286+
void *next_key)
287+
{
288+
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
289+
u32 idx, *next = next_key;
290+
struct bpf_dtab_netdev *dev, *next_dev;
291+
struct hlist_head *head;
292+
int i = 0;
293+
294+
if (!key)
295+
goto find_first;
296+
297+
idx = *(u32 *)key;
298+
299+
dev = __dev_map_hash_lookup_elem(map, idx);
300+
if (!dev)
301+
goto find_first;
302+
303+
next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)),
304+
struct bpf_dtab_netdev, index_hlist);
305+
306+
if (next_dev) {
307+
*next = next_dev->idx;
308+
return 0;
309+
}
310+
311+
i = idx & (dtab->n_buckets - 1);
312+
i++;
313+
314+
find_first:
315+
for (; i < dtab->n_buckets; i++) {
316+
head = dev_map_index_hash(dtab, i);
317+
318+
next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
319+
struct bpf_dtab_netdev,
320+
index_hlist);
321+
if (next_dev) {
322+
*next = next_dev->idx;
323+
return 0;
324+
}
325+
}
326+
327+
return -ENOENT;
328+
}
329+
221330
static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags,
222331
bool in_napi_ctx)
223332
{
@@ -373,6 +482,15 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
373482
return dev ? &dev->ifindex : NULL;
374483
}
375484

485+
static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
486+
{
487+
struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
488+
*(u32 *)key);
489+
struct net_device *dev = obj ? obj->dev : NULL;
490+
491+
return dev ? &dev->ifindex : NULL;
492+
}
493+
376494
static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
377495
{
378496
if (dev->dev->netdev_ops->ndo_xdp_xmit) {
@@ -422,6 +540,28 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
422540
return 0;
423541
}
424542

543+
static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
544+
{
545+
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
546+
struct bpf_dtab_netdev *old_dev;
547+
int k = *(u32 *)key;
548+
unsigned long flags;
549+
int ret = -ENOENT;
550+
551+
spin_lock_irqsave(&dtab->index_lock, flags);
552+
553+
old_dev = __dev_map_hash_lookup_elem(map, k);
554+
if (old_dev) {
555+
dtab->items--;
556+
hlist_del_init_rcu(&old_dev->index_hlist);
557+
call_rcu(&old_dev->rcu, __dev_map_entry_free);
558+
ret = 0;
559+
}
560+
spin_unlock_irqrestore(&dtab->index_lock, flags);
561+
562+
return ret;
563+
}
564+
425565
static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
426566
struct bpf_dtab *dtab,
427567
u32 ifindex,
@@ -502,6 +642,56 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
502642
map, key, value, map_flags);
503643
}
504644

645+
static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
646+
void *key, void *value, u64 map_flags)
647+
{
648+
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
649+
struct bpf_dtab_netdev *dev, *old_dev;
650+
u32 ifindex = *(u32 *)value;
651+
u32 idx = *(u32 *)key;
652+
unsigned long flags;
653+
654+
if (unlikely(map_flags > BPF_EXIST || !ifindex))
655+
return -EINVAL;
656+
657+
old_dev = __dev_map_hash_lookup_elem(map, idx);
658+
if (old_dev && (map_flags & BPF_NOEXIST))
659+
return -EEXIST;
660+
661+
dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
662+
if (IS_ERR(dev))
663+
return PTR_ERR(dev);
664+
665+
spin_lock_irqsave(&dtab->index_lock, flags);
666+
667+
if (old_dev) {
668+
hlist_del_rcu(&old_dev->index_hlist);
669+
} else {
670+
if (dtab->items >= dtab->map.max_entries) {
671+
spin_unlock_irqrestore(&dtab->index_lock, flags);
672+
call_rcu(&dev->rcu, __dev_map_entry_free);
673+
return -E2BIG;
674+
}
675+
dtab->items++;
676+
}
677+
678+
hlist_add_head_rcu(&dev->index_hlist,
679+
dev_map_index_hash(dtab, idx));
680+
spin_unlock_irqrestore(&dtab->index_lock, flags);
681+
682+
if (old_dev)
683+
call_rcu(&old_dev->rcu, __dev_map_entry_free);
684+
685+
return 0;
686+
}
687+
688+
static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
689+
u64 map_flags)
690+
{
691+
return __dev_map_hash_update_elem(current->nsproxy->net_ns,
692+
map, key, value, map_flags);
693+
}
694+
505695
const struct bpf_map_ops dev_map_ops = {
506696
.map_alloc = dev_map_alloc,
507697
.map_free = dev_map_free,
@@ -512,6 +702,16 @@ const struct bpf_map_ops dev_map_ops = {
512702
.map_check_btf = map_check_no_btf,
513703
};
514704

705+
const struct bpf_map_ops dev_map_hash_ops = {
706+
.map_alloc = dev_map_alloc,
707+
.map_free = dev_map_free,
708+
.map_get_next_key = dev_map_hash_get_next_key,
709+
.map_lookup_elem = dev_map_hash_lookup_elem,
710+
.map_update_elem = dev_map_hash_update_elem,
711+
.map_delete_elem = dev_map_hash_delete_elem,
712+
.map_check_btf = map_check_no_btf,
713+
};
714+
515715
static int dev_map_notification(struct notifier_block *notifier,
516716
ulong event, void *ptr)
517717
{

kernel/bpf/verifier.c

+2
Original file line numberDiff line numberDiff line change
@@ -3457,6 +3457,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
34573457
goto error;
34583458
break;
34593459
case BPF_MAP_TYPE_DEVMAP:
3460+
case BPF_MAP_TYPE_DEVMAP_HASH:
34603461
if (func_id != BPF_FUNC_redirect_map &&
34613462
func_id != BPF_FUNC_map_lookup_elem)
34623463
goto error;
@@ -3539,6 +3540,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
35393540
break;
35403541
case BPF_FUNC_redirect_map:
35413542
if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
3543+
map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
35423544
map->map_type != BPF_MAP_TYPE_CPUMAP &&
35433545
map->map_type != BPF_MAP_TYPE_XSKMAP)
35443546
goto error;

net/core/filter.c

+7-2
Original file line numberDiff line numberDiff line change
@@ -3517,7 +3517,8 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
35173517
int err;
35183518

35193519
switch (map->map_type) {
3520-
case BPF_MAP_TYPE_DEVMAP: {
3520+
case BPF_MAP_TYPE_DEVMAP:
3521+
case BPF_MAP_TYPE_DEVMAP_HASH: {
35213522
struct bpf_dtab_netdev *dst = fwd;
35223523

35233524
err = dev_map_enqueue(dst, xdp, dev_rx);
@@ -3554,6 +3555,7 @@ void xdp_do_flush_map(void)
35543555
if (map) {
35553556
switch (map->map_type) {
35563557
case BPF_MAP_TYPE_DEVMAP:
3558+
case BPF_MAP_TYPE_DEVMAP_HASH:
35573559
__dev_map_flush(map);
35583560
break;
35593561
case BPF_MAP_TYPE_CPUMAP:
@@ -3574,6 +3576,8 @@ static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
35743576
switch (map->map_type) {
35753577
case BPF_MAP_TYPE_DEVMAP:
35763578
return __dev_map_lookup_elem(map, index);
3579+
case BPF_MAP_TYPE_DEVMAP_HASH:
3580+
return __dev_map_hash_lookup_elem(map, index);
35773581
case BPF_MAP_TYPE_CPUMAP:
35783582
return __cpu_map_lookup_elem(map, index);
35793583
case BPF_MAP_TYPE_XSKMAP:
@@ -3655,7 +3659,8 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
36553659
ri->tgt_value = NULL;
36563660
WRITE_ONCE(ri->map, NULL);
36573661

3658-
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
3662+
if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
3663+
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
36593664
struct bpf_dtab_netdev *dst = fwd;
36603665

36613666
err = dev_map_generic_redirect(dst, skb, xdp_prog);

0 commit comments

Comments
 (0)