Skip to content

Commit 96049f3

Browse files
Alexei Starovoitovborkmann
Alexei Starovoitov
authored andcommitted
bpf: introduce BPF_F_LOCK flag
Introduce BPF_F_LOCK flag for map_lookup and map_update syscall commands and for map_update() helper function. In all these cases take a lock of existing element (which was provided in BTF description) before copying (in or out) the rest of map value. Implementation details that are part of uapi: Array: The array map takes the element lock for lookup/update. Hash: hash map also takes the lock for lookup/update and tries to avoid the bucket lock. If old element exists it takes the element lock and updates the element in place. If element doesn't exist it allocates new one and inserts into hash table while holding the bucket lock. In rare case the hashmap has to take both the bucket lock and the element lock to update old value in place. Cgroup local storage: It is similar to array. update in place and lookup are done with lock taken. Signed-off-by: Alexei Starovoitov <[email protected]> Signed-off-by: Daniel Borkmann <[email protected]>
1 parent ab963be commit 96049f3

File tree

7 files changed

+110
-14
lines changed

7 files changed

+110
-14
lines changed

include/linux/bpf.h

+2
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
119119
memcpy(dst, src, map->value_size);
120120
}
121121
}
122+
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
123+
bool lock_src);
122124

123125
struct bpf_offload_dev;
124126
struct bpf_offloaded_map;

include/uapi/linux/bpf.h

+1
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ enum bpf_attach_type {
267267
#define BPF_ANY 0 /* create new element or update existing */
268268
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
269269
#define BPF_EXIST 2 /* update existing element */
270+
#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
270271

271272
/* flags for BPF_MAP_CREATE command */
272273
#define BPF_F_NO_PREALLOC (1U << 0)

kernel/bpf/arraymap.c

+16-8
Original file line numberDiff line numberDiff line change
@@ -253,27 +253,35 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
253253
{
254254
struct bpf_array *array = container_of(map, struct bpf_array, map);
255255
u32 index = *(u32 *)key;
256+
char *val;
256257

257-
if (unlikely(map_flags > BPF_EXIST))
258+
if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
258259
/* unknown flags */
259260
return -EINVAL;
260261

261262
if (unlikely(index >= array->map.max_entries))
262263
/* all elements were pre-allocated, cannot insert a new one */
263264
return -E2BIG;
264265

265-
if (unlikely(map_flags == BPF_NOEXIST))
266+
if (unlikely(map_flags & BPF_NOEXIST))
266267
/* all elements already exist */
267268
return -EEXIST;
268269

269-
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
270+
if (unlikely((map_flags & BPF_F_LOCK) &&
271+
!map_value_has_spin_lock(map)))
272+
return -EINVAL;
273+
274+
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
270275
memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
271276
value, map->value_size);
272-
else
273-
copy_map_value(map,
274-
array->value +
275-
array->elem_size * (index & array->index_mask),
276-
value);
277+
} else {
278+
val = array->value +
279+
array->elem_size * (index & array->index_mask);
280+
if (map_flags & BPF_F_LOCK)
281+
copy_map_value_locked(map, val, value, false);
282+
else
283+
copy_map_value(map, val, value);
284+
}
277285
return 0;
278286
}
279287

kernel/bpf/hashtab.c

+39-3
Original file line numberDiff line numberDiff line change
@@ -804,11 +804,11 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
804804
static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
805805
u64 map_flags)
806806
{
807-
if (l_old && map_flags == BPF_NOEXIST)
807+
if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
808808
/* elem already exists */
809809
return -EEXIST;
810810

811-
if (!l_old && map_flags == BPF_EXIST)
811+
if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
812812
/* elem doesn't exist, cannot update it */
813813
return -ENOENT;
814814

@@ -827,7 +827,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
827827
u32 key_size, hash;
828828
int ret;
829829

830-
if (unlikely(map_flags > BPF_EXIST))
830+
if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
831831
/* unknown flags */
832832
return -EINVAL;
833833

@@ -840,6 +840,28 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
840840
b = __select_bucket(htab, hash);
841841
head = &b->head;
842842

843+
if (unlikely(map_flags & BPF_F_LOCK)) {
844+
if (unlikely(!map_value_has_spin_lock(map)))
845+
return -EINVAL;
846+
/* find an element without taking the bucket lock */
847+
l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
848+
htab->n_buckets);
849+
ret = check_flags(htab, l_old, map_flags);
850+
if (ret)
851+
return ret;
852+
if (l_old) {
853+
/* grab the element lock and update value in place */
854+
copy_map_value_locked(map,
855+
l_old->key + round_up(key_size, 8),
856+
value, false);
857+
return 0;
858+
}
859+
/* fall through, grab the bucket lock and lookup again.
860+
* 99.9% chance that the element won't be found,
861+
* but second lookup under lock has to be done.
862+
*/
863+
}
864+
843865
/* bpf_map_update_elem() can be called in_irq() */
844866
raw_spin_lock_irqsave(&b->lock, flags);
845867

@@ -849,6 +871,20 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
849871
if (ret)
850872
goto err;
851873

874+
if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
875+
/* first lookup without the bucket lock didn't find the element,
876+
* but second lookup with the bucket lock found it.
877+
* This case is highly unlikely, but has to be dealt with:
878+
* grab the element lock in addition to the bucket lock
879+
* and update element in place
880+
*/
881+
copy_map_value_locked(map,
882+
l_old->key + round_up(key_size, 8),
883+
value, false);
884+
ret = 0;
885+
goto err;
886+
}
887+
852888
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
853889
l_old);
854890
if (IS_ERR(l_new)) {

kernel/bpf/helpers.c

+16
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,22 @@ const struct bpf_func_proto bpf_spin_unlock_proto = {
301301
.arg1_type = ARG_PTR_TO_SPIN_LOCK,
302302
};
303303

304+
void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
305+
bool lock_src)
306+
{
307+
struct bpf_spin_lock *lock;
308+
309+
if (lock_src)
310+
lock = src + map->spin_lock_off;
311+
else
312+
lock = dst + map->spin_lock_off;
313+
preempt_disable();
314+
____bpf_spin_lock(lock);
315+
copy_map_value(map, dst, src);
316+
____bpf_spin_unlock(lock);
317+
preempt_enable();
318+
}
319+
304320
#ifdef CONFIG_CGROUPS
305321
BPF_CALL_0(bpf_get_current_cgroup_id)
306322
{

kernel/bpf/local_storage.c

+13-1
Original file line numberDiff line numberDiff line change
@@ -131,14 +131,26 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
131131
struct bpf_cgroup_storage *storage;
132132
struct bpf_storage_buffer *new;
133133

134-
if (flags != BPF_ANY && flags != BPF_EXIST)
134+
if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST)))
135+
return -EINVAL;
136+
137+
if (unlikely(flags & BPF_NOEXIST))
138+
return -EINVAL;
139+
140+
if (unlikely((flags & BPF_F_LOCK) &&
141+
!map_value_has_spin_lock(map)))
135142
return -EINVAL;
136143

137144
storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
138145
key, false);
139146
if (!storage)
140147
return -ENOENT;
141148

149+
if (flags & BPF_F_LOCK) {
150+
copy_map_value_locked(map, storage->buf->data, value, false);
151+
return 0;
152+
}
153+
142154
new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
143155
map->value_size,
144156
__GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,

kernel/bpf/syscall.c

+23-2
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,7 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size)
682682
}
683683

684684
/* last field in 'union bpf_attr' used by this command */
685-
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
685+
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
686686

687687
static int map_lookup_elem(union bpf_attr *attr)
688688
{
@@ -698,6 +698,9 @@ static int map_lookup_elem(union bpf_attr *attr)
698698
if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
699699
return -EINVAL;
700700

701+
if (attr->flags & ~BPF_F_LOCK)
702+
return -EINVAL;
703+
701704
f = fdget(ufd);
702705
map = __bpf_map_get(f);
703706
if (IS_ERR(map))
@@ -708,6 +711,12 @@ static int map_lookup_elem(union bpf_attr *attr)
708711
goto err_put;
709712
}
710713

714+
if ((attr->flags & BPF_F_LOCK) &&
715+
!map_value_has_spin_lock(map)) {
716+
err = -EINVAL;
717+
goto err_put;
718+
}
719+
711720
key = __bpf_copy_key(ukey, map->key_size);
712721
if (IS_ERR(key)) {
713722
err = PTR_ERR(key);
@@ -758,7 +767,13 @@ static int map_lookup_elem(union bpf_attr *attr)
758767
err = -ENOENT;
759768
} else {
760769
err = 0;
761-
copy_map_value(map, value, ptr);
770+
if (attr->flags & BPF_F_LOCK)
771+
/* lock 'ptr' and copy everything but lock */
772+
copy_map_value_locked(map, value, ptr, true);
773+
else
774+
copy_map_value(map, value, ptr);
775+
/* mask lock, since value wasn't zero inited */
776+
check_and_init_map_lock(map, value);
762777
}
763778
rcu_read_unlock();
764779
}
@@ -818,6 +833,12 @@ static int map_update_elem(union bpf_attr *attr)
818833
goto err_put;
819834
}
820835

836+
if ((attr->flags & BPF_F_LOCK) &&
837+
!map_value_has_spin_lock(map)) {
838+
err = -EINVAL;
839+
goto err_put;
840+
}
841+
821842
key = __bpf_copy_key(ukey, map->key_size);
822843
if (IS_ERR(key)) {
823844
err = PTR_ERR(key);

0 commit comments

Comments
 (0)