Skip to content

Commit bcc6b1b

Browse files
iamkafaidavem330
authored andcommitted
bpf: Add hash of maps support
This patch adds hash of maps support (hashmap->bpf_map). BPF_MAP_TYPE_HASH_OF_MAPS is added. A map-in-map contains a pointer to another map and lets call this pointer 'inner_map_ptr'. Notes on deleting inner_map_ptr from a hash map: 1. For BPF_F_NO_PREALLOC map-in-map, when deleting an inner_map_ptr, the htab_elem itself will go through a rcu grace period and the inner_map_ptr resides in the htab_elem. 2. For pre-allocated htab_elem (!BPF_F_NO_PREALLOC), when deleting an inner_map_ptr, the htab_elem may get reused immediately. This situation is similar to the existing prealloc-ated use cases. However, the bpf_map_fd_put_ptr() calls bpf_map_put() which calls inner_map->ops->map_free(inner_map) which will go through a rcu grace period (i.e. all bpf_map's map_free currently goes through a rcu grace period). Hence, the inner_map_ptr is still safe for the rcu reader side. This patch also includes BPF_MAP_TYPE_HASH_OF_MAPS to the check_map_prealloc() in the verifier. preallocation is a must for BPF_PROG_TYPE_PERF_EVENT. Hence, even we don't expect heavy updates to map-in-map, enforcing BPF_F_NO_PREALLOC for map-in-map is impossible without disallowing BPF_PROG_TYPE_PERF_EVENT from using map-in-map first. Signed-off-by: Martin KaFai Lau <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Acked-by: Daniel Borkmann <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 56f668d commit bcc6b1b

File tree

5 files changed

+134
-2
lines changed

5 files changed

+134
-2
lines changed

include/linux/bpf.h

+2
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
277277
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
278278
void *key, void *value, u64 map_flags);
279279
void bpf_fd_array_map_clear(struct bpf_map *map);
280+
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
281+
void *key, void *value, u64 map_flags);
280282

281283
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
282284
* forced to use 'long' read/writes to try to atomically copy long counters.

include/uapi/linux/bpf.h

+1
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ enum bpf_map_type {
9797
BPF_MAP_TYPE_LRU_PERCPU_HASH,
9898
BPF_MAP_TYPE_LPM_TRIE,
9999
BPF_MAP_TYPE_ARRAY_OF_MAPS,
100+
BPF_MAP_TYPE_HASH_OF_MAPS,
100101
};
101102

102103
enum bpf_prog_type {

kernel/bpf/hashtab.c

+121
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/rculist_nulls.h>
1717
#include "percpu_freelist.h"
1818
#include "bpf_lru_list.h"
19+
#include "map_in_map.h"
1920

2021
struct bucket {
2122
struct hlist_nulls_head head;
@@ -88,6 +89,11 @@ static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size
8889
return *(void __percpu **)(l->key + key_size);
8990
}
9091

92+
static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l)
93+
{
94+
return *(void **)(l->key + roundup(map->key_size, 8));
95+
}
96+
9197
static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
9298
{
9399
return (struct htab_elem *) (htab->elems + i * htab->elem_size);
@@ -603,6 +609,14 @@ static void htab_elem_free_rcu(struct rcu_head *head)
603609

604610
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
605611
{
612+
struct bpf_map *map = &htab->map;
613+
614+
if (map->ops->map_fd_put_ptr) {
615+
void *ptr = fd_htab_map_get_ptr(map, l);
616+
617+
map->ops->map_fd_put_ptr(ptr);
618+
}
619+
606620
if (l->state == HTAB_EXTRA_ELEM_USED) {
607621
l->state = HTAB_EXTRA_ELEM_FREE;
608622
return;
@@ -1057,6 +1071,7 @@ static void delete_all_elements(struct bpf_htab *htab)
10571071
}
10581072
}
10591073
}
1074+
10601075
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
10611076
static void htab_map_free(struct bpf_map *map)
10621077
{
@@ -1213,12 +1228,118 @@ static struct bpf_map_type_list htab_lru_percpu_type __ro_after_init = {
12131228
.type = BPF_MAP_TYPE_LRU_PERCPU_HASH,
12141229
};
12151230

1231+
static struct bpf_map *fd_htab_map_alloc(union bpf_attr *attr)
1232+
{
1233+
struct bpf_map *map;
1234+
1235+
if (attr->value_size != sizeof(u32))
1236+
return ERR_PTR(-EINVAL);
1237+
1238+
/* pointer is stored internally */
1239+
attr->value_size = sizeof(void *);
1240+
map = htab_map_alloc(attr);
1241+
attr->value_size = sizeof(u32);
1242+
1243+
return map;
1244+
}
1245+
1246+
static void fd_htab_map_free(struct bpf_map *map)
1247+
{
1248+
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1249+
struct hlist_nulls_node *n;
1250+
struct hlist_nulls_head *head;
1251+
struct htab_elem *l;
1252+
int i;
1253+
1254+
for (i = 0; i < htab->n_buckets; i++) {
1255+
head = select_bucket(htab, i);
1256+
1257+
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
1258+
void *ptr = fd_htab_map_get_ptr(map, l);
1259+
1260+
map->ops->map_fd_put_ptr(ptr);
1261+
}
1262+
}
1263+
1264+
htab_map_free(map);
1265+
}
1266+
1267+
/* only called from syscall */
1268+
int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
1269+
void *key, void *value, u64 map_flags)
1270+
{
1271+
void *ptr;
1272+
int ret;
1273+
u32 ufd = *(u32 *)value;
1274+
1275+
ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
1276+
if (IS_ERR(ptr))
1277+
return PTR_ERR(ptr);
1278+
1279+
ret = htab_map_update_elem(map, key, &ptr, map_flags);
1280+
if (ret)
1281+
map->ops->map_fd_put_ptr(ptr);
1282+
1283+
return ret;
1284+
}
1285+
1286+
static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
1287+
{
1288+
struct bpf_map *map, *inner_map_meta;
1289+
1290+
inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
1291+
if (IS_ERR(inner_map_meta))
1292+
return inner_map_meta;
1293+
1294+
map = fd_htab_map_alloc(attr);
1295+
if (IS_ERR(map)) {
1296+
bpf_map_meta_free(inner_map_meta);
1297+
return map;
1298+
}
1299+
1300+
map->inner_map_meta = inner_map_meta;
1301+
1302+
return map;
1303+
}
1304+
1305+
static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
1306+
{
1307+
struct bpf_map **inner_map = htab_map_lookup_elem(map, key);
1308+
1309+
if (!inner_map)
1310+
return NULL;
1311+
1312+
return READ_ONCE(*inner_map);
1313+
}
1314+
1315+
static void htab_of_map_free(struct bpf_map *map)
1316+
{
1317+
bpf_map_meta_free(map->inner_map_meta);
1318+
fd_htab_map_free(map);
1319+
}
1320+
1321+
static const struct bpf_map_ops htab_of_map_ops = {
1322+
.map_alloc = htab_of_map_alloc,
1323+
.map_free = htab_of_map_free,
1324+
.map_get_next_key = htab_map_get_next_key,
1325+
.map_lookup_elem = htab_of_map_lookup_elem,
1326+
.map_delete_elem = htab_map_delete_elem,
1327+
.map_fd_get_ptr = bpf_map_fd_get_ptr,
1328+
.map_fd_put_ptr = bpf_map_fd_put_ptr,
1329+
};
1330+
1331+
static struct bpf_map_type_list htab_of_map_type __ro_after_init = {
1332+
.ops = &htab_of_map_ops,
1333+
.type = BPF_MAP_TYPE_HASH_OF_MAPS,
1334+
};
1335+
12161336
static int __init register_htab_map(void)
12171337
{
12181338
bpf_register_map_type(&htab_type);
12191339
bpf_register_map_type(&htab_percpu_type);
12201340
bpf_register_map_type(&htab_lru_type);
12211341
bpf_register_map_type(&htab_lru_percpu_type);
1342+
bpf_register_map_type(&htab_of_map_type);
12221343
return 0;
12231344
}
12241345
late_initcall(register_htab_map);

kernel/bpf/syscall.c

+7-1
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,8 @@ static int map_lookup_elem(union bpf_attr *attr)
352352
err = bpf_percpu_array_copy(map, key, value);
353353
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
354354
err = bpf_stackmap_copy(map, key, value);
355-
} else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
355+
} else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
356+
map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
356357
err = -ENOTSUPP;
357358
} else {
358359
rcu_read_lock();
@@ -446,6 +447,11 @@ static int map_update_elem(union bpf_attr *attr)
446447
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
447448
attr->flags);
448449
rcu_read_unlock();
450+
} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
451+
rcu_read_lock();
452+
err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
453+
attr->flags);
454+
rcu_read_unlock();
449455
} else {
450456
rcu_read_lock();
451457
err = map->ops->map_update_elem(map, key, value, attr->flags);

kernel/bpf/verifier.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -1200,6 +1200,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
12001200
goto error;
12011201
break;
12021202
case BPF_MAP_TYPE_ARRAY_OF_MAPS:
1203+
case BPF_MAP_TYPE_HASH_OF_MAPS:
12031204
if (func_id != BPF_FUNC_map_lookup_elem)
12041205
goto error;
12051206
default:
@@ -3044,7 +3045,8 @@ static int do_check(struct bpf_verifier_env *env)
30443045
static int check_map_prealloc(struct bpf_map *map)
30453046
{
30463047
return (map->map_type != BPF_MAP_TYPE_HASH &&
3047-
map->map_type != BPF_MAP_TYPE_PERCPU_HASH) ||
3048+
map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
3049+
map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
30483050
!(map->map_flags & BPF_F_NO_PREALLOC);
30493051
}
30503052

0 commit comments

Comments
 (0)