37
37
* notifier hook walks the map we know that new dev references can not be
38
38
* added by the user because core infrastructure ensures dev_get_by_index()
39
39
* calls will fail at this point.
40
+ *
41
+ * The devmap_hash type is a map type which interprets keys as ifindexes and
42
+ * indexes these using a hashmap. This allows maps that use ifindex as key to be
43
+ * densely packed instead of having holes in the lookup array for unused
44
+ * ifindexes. The setup and packet enqueue/send code is shared between the two
45
+ * types of devmap; only the lookup and insertion is different.
40
46
*/
41
47
#include <linux/bpf.h>
42
48
#include <net/xdp.h>
@@ -59,6 +65,7 @@ struct xdp_bulk_queue {
59
65
60
66
struct bpf_dtab_netdev {
61
67
struct net_device * dev ; /* must be first member, due to tracepoint */
68
+ struct hlist_node index_hlist ;
62
69
struct bpf_dtab * dtab ;
63
70
struct xdp_bulk_queue __percpu * bulkq ;
64
71
struct rcu_head rcu ;
@@ -70,11 +77,30 @@ struct bpf_dtab {
70
77
struct bpf_dtab_netdev * * netdev_map ;
71
78
struct list_head __percpu * flush_list ;
72
79
struct list_head list ;
80
+
81
+ /* these are only used for DEVMAP_HASH type maps */
82
+ struct hlist_head * dev_index_head ;
83
+ spinlock_t index_lock ;
84
+ unsigned int items ;
85
+ u32 n_buckets ;
73
86
};
74
87
75
88
static DEFINE_SPINLOCK (dev_map_lock );
76
89
static LIST_HEAD (dev_map_list );
77
90
91
+ static struct hlist_head * dev_map_create_hash (unsigned int entries )
92
+ {
93
+ int i ;
94
+ struct hlist_head * hash ;
95
+
96
+ hash = kmalloc_array (entries , sizeof (* hash ), GFP_KERNEL );
97
+ if (hash != NULL )
98
+ for (i = 0 ; i < entries ; i ++ )
99
+ INIT_HLIST_HEAD (& hash [i ]);
100
+
101
+ return hash ;
102
+ }
103
+
78
104
static int dev_map_init_map (struct bpf_dtab * dtab , union bpf_attr * attr )
79
105
{
80
106
int err , cpu ;
@@ -97,6 +123,14 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
97
123
cost = (u64 ) dtab -> map .max_entries * sizeof (struct bpf_dtab_netdev * );
98
124
cost += sizeof (struct list_head ) * num_possible_cpus ();
99
125
126
+ if (attr -> map_type == BPF_MAP_TYPE_DEVMAP_HASH ) {
127
+ dtab -> n_buckets = roundup_pow_of_two (dtab -> map .max_entries );
128
+
129
+ if (!dtab -> n_buckets ) /* Overflow check */
130
+ return - EINVAL ;
131
+ cost += sizeof (struct hlist_head ) * dtab -> n_buckets ;
132
+ }
133
+
100
134
/* if map size is larger than memlock limit, reject it */
101
135
err = bpf_map_charge_init (& dtab -> map .memory , cost );
102
136
if (err )
@@ -115,8 +149,18 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
115
149
if (!dtab -> netdev_map )
116
150
goto free_percpu ;
117
151
152
+ if (attr -> map_type == BPF_MAP_TYPE_DEVMAP_HASH ) {
153
+ dtab -> dev_index_head = dev_map_create_hash (dtab -> n_buckets );
154
+ if (!dtab -> dev_index_head )
155
+ goto free_map_area ;
156
+
157
+ spin_lock_init (& dtab -> index_lock );
158
+ }
159
+
118
160
return 0 ;
119
161
162
+ free_map_area :
163
+ bpf_map_area_free (dtab -> netdev_map );
120
164
free_percpu :
121
165
free_percpu (dtab -> flush_list );
122
166
free_charge :
@@ -198,6 +242,7 @@ static void dev_map_free(struct bpf_map *map)
198
242
199
243
free_percpu (dtab -> flush_list );
200
244
bpf_map_area_free (dtab -> netdev_map );
245
+ kfree (dtab -> dev_index_head );
201
246
kfree (dtab );
202
247
}
203
248
@@ -218,6 +263,70 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
218
263
return 0 ;
219
264
}
220
265
266
+ static inline struct hlist_head * dev_map_index_hash (struct bpf_dtab * dtab ,
267
+ int idx )
268
+ {
269
+ return & dtab -> dev_index_head [idx & (dtab -> n_buckets - 1 )];
270
+ }
271
+
272
+ struct bpf_dtab_netdev * __dev_map_hash_lookup_elem (struct bpf_map * map , u32 key )
273
+ {
274
+ struct bpf_dtab * dtab = container_of (map , struct bpf_dtab , map );
275
+ struct hlist_head * head = dev_map_index_hash (dtab , key );
276
+ struct bpf_dtab_netdev * dev ;
277
+
278
+ hlist_for_each_entry_rcu (dev , head , index_hlist )
279
+ if (dev -> idx == key )
280
+ return dev ;
281
+
282
+ return NULL ;
283
+ }
284
+
285
+ static int dev_map_hash_get_next_key (struct bpf_map * map , void * key ,
286
+ void * next_key )
287
+ {
288
+ struct bpf_dtab * dtab = container_of (map , struct bpf_dtab , map );
289
+ u32 idx , * next = next_key ;
290
+ struct bpf_dtab_netdev * dev , * next_dev ;
291
+ struct hlist_head * head ;
292
+ int i = 0 ;
293
+
294
+ if (!key )
295
+ goto find_first ;
296
+
297
+ idx = * (u32 * )key ;
298
+
299
+ dev = __dev_map_hash_lookup_elem (map , idx );
300
+ if (!dev )
301
+ goto find_first ;
302
+
303
+ next_dev = hlist_entry_safe (rcu_dereference_raw (hlist_next_rcu (& dev -> index_hlist )),
304
+ struct bpf_dtab_netdev , index_hlist );
305
+
306
+ if (next_dev ) {
307
+ * next = next_dev -> idx ;
308
+ return 0 ;
309
+ }
310
+
311
+ i = idx & (dtab -> n_buckets - 1 );
312
+ i ++ ;
313
+
314
+ find_first :
315
+ for (; i < dtab -> n_buckets ; i ++ ) {
316
+ head = dev_map_index_hash (dtab , i );
317
+
318
+ next_dev = hlist_entry_safe (rcu_dereference_raw (hlist_first_rcu (head )),
319
+ struct bpf_dtab_netdev ,
320
+ index_hlist );
321
+ if (next_dev ) {
322
+ * next = next_dev -> idx ;
323
+ return 0 ;
324
+ }
325
+ }
326
+
327
+ return - ENOENT ;
328
+ }
329
+
221
330
static int bq_xmit_all (struct xdp_bulk_queue * bq , u32 flags ,
222
331
bool in_napi_ctx )
223
332
{
@@ -373,6 +482,15 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
373
482
return dev ? & dev -> ifindex : NULL ;
374
483
}
375
484
485
+ static void * dev_map_hash_lookup_elem (struct bpf_map * map , void * key )
486
+ {
487
+ struct bpf_dtab_netdev * obj = __dev_map_hash_lookup_elem (map ,
488
+ * (u32 * )key );
489
+ struct net_device * dev = obj ? obj -> dev : NULL ;
490
+
491
+ return dev ? & dev -> ifindex : NULL ;
492
+ }
493
+
376
494
static void dev_map_flush_old (struct bpf_dtab_netdev * dev )
377
495
{
378
496
if (dev -> dev -> netdev_ops -> ndo_xdp_xmit ) {
@@ -422,6 +540,28 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
422
540
return 0 ;
423
541
}
424
542
543
+ static int dev_map_hash_delete_elem (struct bpf_map * map , void * key )
544
+ {
545
+ struct bpf_dtab * dtab = container_of (map , struct bpf_dtab , map );
546
+ struct bpf_dtab_netdev * old_dev ;
547
+ int k = * (u32 * )key ;
548
+ unsigned long flags ;
549
+ int ret = - ENOENT ;
550
+
551
+ spin_lock_irqsave (& dtab -> index_lock , flags );
552
+
553
+ old_dev = __dev_map_hash_lookup_elem (map , k );
554
+ if (old_dev ) {
555
+ dtab -> items -- ;
556
+ hlist_del_init_rcu (& old_dev -> index_hlist );
557
+ call_rcu (& old_dev -> rcu , __dev_map_entry_free );
558
+ ret = 0 ;
559
+ }
560
+ spin_unlock_irqrestore (& dtab -> index_lock , flags );
561
+
562
+ return ret ;
563
+ }
564
+
425
565
static struct bpf_dtab_netdev * __dev_map_alloc_node (struct net * net ,
426
566
struct bpf_dtab * dtab ,
427
567
u32 ifindex ,
@@ -502,6 +642,56 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
502
642
map , key , value , map_flags );
503
643
}
504
644
645
+ static int __dev_map_hash_update_elem (struct net * net , struct bpf_map * map ,
646
+ void * key , void * value , u64 map_flags )
647
+ {
648
+ struct bpf_dtab * dtab = container_of (map , struct bpf_dtab , map );
649
+ struct bpf_dtab_netdev * dev , * old_dev ;
650
+ u32 ifindex = * (u32 * )value ;
651
+ u32 idx = * (u32 * )key ;
652
+ unsigned long flags ;
653
+
654
+ if (unlikely (map_flags > BPF_EXIST || !ifindex ))
655
+ return - EINVAL ;
656
+
657
+ old_dev = __dev_map_hash_lookup_elem (map , idx );
658
+ if (old_dev && (map_flags & BPF_NOEXIST ))
659
+ return - EEXIST ;
660
+
661
+ dev = __dev_map_alloc_node (net , dtab , ifindex , idx );
662
+ if (IS_ERR (dev ))
663
+ return PTR_ERR (dev );
664
+
665
+ spin_lock_irqsave (& dtab -> index_lock , flags );
666
+
667
+ if (old_dev ) {
668
+ hlist_del_rcu (& old_dev -> index_hlist );
669
+ } else {
670
+ if (dtab -> items >= dtab -> map .max_entries ) {
671
+ spin_unlock_irqrestore (& dtab -> index_lock , flags );
672
+ call_rcu (& dev -> rcu , __dev_map_entry_free );
673
+ return - E2BIG ;
674
+ }
675
+ dtab -> items ++ ;
676
+ }
677
+
678
+ hlist_add_head_rcu (& dev -> index_hlist ,
679
+ dev_map_index_hash (dtab , idx ));
680
+ spin_unlock_irqrestore (& dtab -> index_lock , flags );
681
+
682
+ if (old_dev )
683
+ call_rcu (& old_dev -> rcu , __dev_map_entry_free );
684
+
685
+ return 0 ;
686
+ }
687
+
688
+ static int dev_map_hash_update_elem (struct bpf_map * map , void * key , void * value ,
689
+ u64 map_flags )
690
+ {
691
+ return __dev_map_hash_update_elem (current -> nsproxy -> net_ns ,
692
+ map , key , value , map_flags );
693
+ }
694
+
505
695
const struct bpf_map_ops dev_map_ops = {
506
696
.map_alloc = dev_map_alloc ,
507
697
.map_free = dev_map_free ,
@@ -512,6 +702,16 @@ const struct bpf_map_ops dev_map_ops = {
512
702
.map_check_btf = map_check_no_btf ,
513
703
};
514
704
705
+ const struct bpf_map_ops dev_map_hash_ops = {
706
+ .map_alloc = dev_map_alloc ,
707
+ .map_free = dev_map_free ,
708
+ .map_get_next_key = dev_map_hash_get_next_key ,
709
+ .map_lookup_elem = dev_map_hash_lookup_elem ,
710
+ .map_update_elem = dev_map_hash_update_elem ,
711
+ .map_delete_elem = dev_map_hash_delete_elem ,
712
+ .map_check_btf = map_check_no_btf ,
713
+ };
714
+
515
715
static int dev_map_notification (struct notifier_block * notifier ,
516
716
ulong event , void * ptr )
517
717
{
0 commit comments