Skip to content

Commit 3e87f19

Browse files
Denis Salopekanakryiko
Denis Salopek
authored andcommitted
bpf: Add lookup_and_delete_elem support to hashtab
Extend the existing bpf_map_lookup_and_delete_elem() functionality to hashtab map types, in addition to stacks and queues. Create a new hashtab bpf_map_ops function that does lookup and deletion of the element under the same bucket lock and add the created map_ops to bpf.h. Signed-off-by: Denis Salopek <[email protected]> Signed-off-by: Andrii Nakryiko <[email protected]> Acked-by: Yonghong Song <[email protected]> Link: https://lore.kernel.org/bpf/4d18480a3e990ffbf14751ddef0325eed3be2966.1620763117.git.denis.salopek@sartura.hr
1 parent f9bceaa commit 3e87f19

File tree

5 files changed

+156
-4
lines changed

5 files changed

+156
-4
lines changed

include/linux/bpf.h

+2
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ struct bpf_map_ops {
7070
void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
7171
int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
7272
union bpf_attr __user *uattr);
73+
int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
74+
void *value, u64 flags);
7375
int (*map_lookup_and_delete_batch)(struct bpf_map *map,
7476
const union bpf_attr *attr,
7577
union bpf_attr __user *uattr);

include/uapi/linux/bpf.h

+13
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,15 @@ union bpf_iter_link_info {
527527
* Look up an element with the given *key* in the map referred to
528528
* by the file descriptor *fd*, and if found, delete the element.
529529
*
530+
* For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
531+
* types, the *flags* argument needs to be set to 0, but for other
532+
* map types, it may be specified as:
533+
*
534+
* **BPF_F_LOCK**
535+
* Look up and delete the value of a spin-locked map
536+
* without returning the lock. This must be specified if
537+
* the elements contain a spinlock.
538+
*
530539
* The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
531540
* implement this command as a "pop" operation, deleting the top
532541
* element rather than one corresponding to *key*.
@@ -536,6 +545,10 @@ union bpf_iter_link_info {
536545
* This command is only valid for the following map types:
537546
* * **BPF_MAP_TYPE_QUEUE**
538547
* * **BPF_MAP_TYPE_STACK**
548+
* * **BPF_MAP_TYPE_HASH**
549+
* * **BPF_MAP_TYPE_PERCPU_HASH**
550+
* * **BPF_MAP_TYPE_LRU_HASH**
551+
* * **BPF_MAP_TYPE_LRU_PERCPU_HASH**
539552
*
540553
* Return
541554
* Returns zero on success. On error, -1 is returned and *errno*

kernel/bpf/hashtab.c

+98
Original file line numberDiff line numberDiff line change
@@ -1401,6 +1401,100 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
14011401
rcu_read_unlock();
14021402
}
14031403

1404+
static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
1405+
void *value, bool is_lru_map,
1406+
bool is_percpu, u64 flags)
1407+
{
1408+
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
1409+
struct hlist_nulls_head *head;
1410+
unsigned long bflags;
1411+
struct htab_elem *l;
1412+
u32 hash, key_size;
1413+
struct bucket *b;
1414+
int ret;
1415+
1416+
key_size = map->key_size;
1417+
1418+
hash = htab_map_hash(key, key_size, htab->hashrnd);
1419+
b = __select_bucket(htab, hash);
1420+
head = &b->head;
1421+
1422+
ret = htab_lock_bucket(htab, b, hash, &bflags);
1423+
if (ret)
1424+
return ret;
1425+
1426+
l = lookup_elem_raw(head, hash, key, key_size);
1427+
if (!l) {
1428+
ret = -ENOENT;
1429+
} else {
1430+
if (is_percpu) {
1431+
u32 roundup_value_size = round_up(map->value_size, 8);
1432+
void __percpu *pptr;
1433+
int off = 0, cpu;
1434+
1435+
pptr = htab_elem_get_ptr(l, key_size);
1436+
for_each_possible_cpu(cpu) {
1437+
bpf_long_memcpy(value + off,
1438+
per_cpu_ptr(pptr, cpu),
1439+
roundup_value_size);
1440+
off += roundup_value_size;
1441+
}
1442+
} else {
1443+
u32 roundup_key_size = round_up(map->key_size, 8);
1444+
1445+
if (flags & BPF_F_LOCK)
1446+
copy_map_value_locked(map, value, l->key +
1447+
roundup_key_size,
1448+
true);
1449+
else
1450+
copy_map_value(map, value, l->key +
1451+
roundup_key_size);
1452+
check_and_init_map_lock(map, value);
1453+
}
1454+
1455+
hlist_nulls_del_rcu(&l->hash_node);
1456+
if (!is_lru_map)
1457+
free_htab_elem(htab, l);
1458+
}
1459+
1460+
htab_unlock_bucket(htab, b, hash, bflags);
1461+
1462+
if (is_lru_map && l)
1463+
bpf_lru_push_free(&htab->lru, &l->lru_node);
1464+
1465+
return ret;
1466+
}
1467+
1468+
static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
1469+
void *value, u64 flags)
1470+
{
1471+
return __htab_map_lookup_and_delete_elem(map, key, value, false, false,
1472+
flags);
1473+
}
1474+
1475+
static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
1476+
void *key, void *value,
1477+
u64 flags)
1478+
{
1479+
return __htab_map_lookup_and_delete_elem(map, key, value, false, true,
1480+
flags);
1481+
}
1482+
1483+
static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
1484+
void *value, u64 flags)
1485+
{
1486+
return __htab_map_lookup_and_delete_elem(map, key, value, true, false,
1487+
flags);
1488+
}
1489+
1490+
static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
1491+
void *key, void *value,
1492+
u64 flags)
1493+
{
1494+
return __htab_map_lookup_and_delete_elem(map, key, value, true, true,
1495+
flags);
1496+
}
1497+
14041498
static int
14051499
__htab_map_lookup_and_delete_batch(struct bpf_map *map,
14061500
const union bpf_attr *attr,
@@ -1934,6 +2028,7 @@ const struct bpf_map_ops htab_map_ops = {
19342028
.map_free = htab_map_free,
19352029
.map_get_next_key = htab_map_get_next_key,
19362030
.map_lookup_elem = htab_map_lookup_elem,
2031+
.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
19372032
.map_update_elem = htab_map_update_elem,
19382033
.map_delete_elem = htab_map_delete_elem,
19392034
.map_gen_lookup = htab_map_gen_lookup,
@@ -1954,6 +2049,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
19542049
.map_free = htab_map_free,
19552050
.map_get_next_key = htab_map_get_next_key,
19562051
.map_lookup_elem = htab_lru_map_lookup_elem,
2052+
.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
19572053
.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
19582054
.map_update_elem = htab_lru_map_update_elem,
19592055
.map_delete_elem = htab_lru_map_delete_elem,
@@ -2077,6 +2173,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
20772173
.map_free = htab_map_free,
20782174
.map_get_next_key = htab_map_get_next_key,
20792175
.map_lookup_elem = htab_percpu_map_lookup_elem,
2176+
.map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
20802177
.map_update_elem = htab_percpu_map_update_elem,
20812178
.map_delete_elem = htab_map_delete_elem,
20822179
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
@@ -2096,6 +2193,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
20962193
.map_free = htab_map_free,
20972194
.map_get_next_key = htab_map_get_next_key,
20982195
.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
2196+
.map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
20992197
.map_update_elem = htab_lru_percpu_map_update_elem,
21002198
.map_delete_elem = htab_lru_map_delete_elem,
21012199
.map_seq_show_elem = htab_percpu_map_seq_show_elem,

kernel/bpf/syscall.c

+30-4
Original file line numberDiff line numberDiff line change
@@ -1483,7 +1483,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
14831483
return err;
14841484
}
14851485

1486-
#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
1486+
#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags
14871487

14881488
static int map_lookup_and_delete_elem(union bpf_attr *attr)
14891489
{
@@ -1499,6 +1499,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
14991499
if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
15001500
return -EINVAL;
15011501

1502+
if (attr->flags & ~BPF_F_LOCK)
1503+
return -EINVAL;
1504+
15021505
f = fdget(ufd);
15031506
map = __bpf_map_get(f);
15041507
if (IS_ERR(map))
@@ -1509,24 +1512,47 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
15091512
goto err_put;
15101513
}
15111514

1515+
if (attr->flags &&
1516+
(map->map_type == BPF_MAP_TYPE_QUEUE ||
1517+
map->map_type == BPF_MAP_TYPE_STACK)) {
1518+
err = -EINVAL;
1519+
goto err_put;
1520+
}
1521+
1522+
if ((attr->flags & BPF_F_LOCK) &&
1523+
!map_value_has_spin_lock(map)) {
1524+
err = -EINVAL;
1525+
goto err_put;
1526+
}
1527+
15121528
key = __bpf_copy_key(ukey, map->key_size);
15131529
if (IS_ERR(key)) {
15141530
err = PTR_ERR(key);
15151531
goto err_put;
15161532
}
15171533

1518-
value_size = map->value_size;
1534+
value_size = bpf_map_value_size(map);
15191535

15201536
err = -ENOMEM;
15211537
value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
15221538
if (!value)
15231539
goto free_key;
15241540

1541+
err = -ENOTSUPP;
15251542
if (map->map_type == BPF_MAP_TYPE_QUEUE ||
15261543
map->map_type == BPF_MAP_TYPE_STACK) {
15271544
err = map->ops->map_pop_elem(map, value);
1528-
} else {
1529-
err = -ENOTSUPP;
1545+
} else if (map->map_type == BPF_MAP_TYPE_HASH ||
1546+
map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
1547+
map->map_type == BPF_MAP_TYPE_LRU_HASH ||
1548+
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
1549+
if (!bpf_map_is_dev_bound(map)) {
1550+
bpf_disable_instrumentation();
1551+
rcu_read_lock();
1552+
err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags);
1553+
rcu_read_unlock();
1554+
bpf_enable_instrumentation();
1555+
}
15301556
}
15311557

15321558
if (err)

tools/include/uapi/linux/bpf.h

+13
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,15 @@ union bpf_iter_link_info {
527527
* Look up an element with the given *key* in the map referred to
528528
* by the file descriptor *fd*, and if found, delete the element.
529529
*
530+
* For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
531+
* types, the *flags* argument needs to be set to 0, but for other
532+
* map types, it may be specified as:
533+
*
534+
* **BPF_F_LOCK**
535+
* Look up and delete the value of a spin-locked map
536+
* without returning the lock. This must be specified if
537+
* the elements contain a spinlock.
538+
*
530539
* The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
531540
* implement this command as a "pop" operation, deleting the top
532541
* element rather than one corresponding to *key*.
@@ -536,6 +545,10 @@ union bpf_iter_link_info {
536545
* This command is only valid for the following map types:
537546
* * **BPF_MAP_TYPE_QUEUE**
538547
* * **BPF_MAP_TYPE_STACK**
548+
* * **BPF_MAP_TYPE_HASH**
549+
* * **BPF_MAP_TYPE_PERCPU_HASH**
550+
* * **BPF_MAP_TYPE_LRU_HASH**
551+
* * **BPF_MAP_TYPE_LRU_PERCPU_HASH**
539552
*
540553
* Return
541554
* Returns zero on success. On error, -1 is returned and *errno*

0 commit comments

Comments
 (0)