Skip to content

Commit db20fd2

Browse files
Alexei Starovoitovdavem330
Alexei Starovoitov
authored andcommitted
bpf: add lookup/update/delete/iterate methods to BPF maps
'maps' is a generic storage of different types for sharing data between kernel and userspace. The maps are accessed from user space via BPF syscall, which has commands: - create a map with given type and attributes fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size) returns fd or negative error - lookup key in a given map referenced by fd err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->value returns zero and stores found elem into value or negative error - create or update key/value pair in a given map err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->value returns zero or negative error - find and delete element by key in a given map err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key - iterate map elements (based on input key return next_key) err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size) using attr->map_fd, attr->key, attr->next_key - close(fd) deletes the map Signed-off-by: Alexei Starovoitov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 749730c commit db20fd2

File tree

3 files changed

+281
-0
lines changed

3 files changed

+281
-0
lines changed

include/linux/bpf.h

+8
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include <uapi/linux/bpf.h>
1111
#include <linux/workqueue.h>
12+
#include <linux/file.h>
1213

1314
struct bpf_map;
1415

@@ -17,6 +18,12 @@ struct bpf_map_ops {
1718
/* funcs callable from userspace (via syscall) */
1819
struct bpf_map *(*map_alloc)(union bpf_attr *attr);
1920
void (*map_free)(struct bpf_map *);
21+
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
22+
23+
/* funcs callable from userspace and from eBPF programs */
24+
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
25+
int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
26+
int (*map_delete_elem)(struct bpf_map *map, void *key);
2027
};
2128

2229
struct bpf_map {
@@ -37,5 +44,6 @@ struct bpf_map_type_list {
3744

3845
void bpf_register_map_type(struct bpf_map_type_list *tl);
3946
void bpf_map_put(struct bpf_map *map);
47+
struct bpf_map *bpf_map_get(struct fd f);
4048

4149
#endif /* _LINUX_BPF_H */

include/uapi/linux/bpf.h

+38
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,35 @@ enum bpf_cmd {
7070
* map is deleted when fd is closed
7171
*/
7272
BPF_MAP_CREATE,
73+
74+
/* lookup key in a given map
75+
* err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
76+
* Using attr->map_fd, attr->key, attr->value
77+
* returns zero and stores found elem into value
78+
* or negative error
79+
*/
80+
BPF_MAP_LOOKUP_ELEM,
81+
82+
/* create or update key/value pair in a given map
83+
* err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
84+
* Using attr->map_fd, attr->key, attr->value
85+
* returns zero or negative error
86+
*/
87+
BPF_MAP_UPDATE_ELEM,
88+
89+
/* find and delete elem by key in a given map
90+
* err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
91+
* Using attr->map_fd, attr->key
92+
* returns zero or negative error
93+
*/
94+
BPF_MAP_DELETE_ELEM,
95+
96+
/* lookup key in a given map and return next key
97+
* err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
98+
* Using attr->map_fd, attr->key, attr->next_key
99+
* returns zero and stores next key or negative error
100+
*/
101+
BPF_MAP_GET_NEXT_KEY,
73102
};
74103

75104
enum bpf_map_type {
@@ -83,6 +112,15 @@ union bpf_attr {
83112
__u32 value_size; /* size of value in bytes */
84113
__u32 max_entries; /* max number of entries in a map */
85114
};
115+
116+
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
117+
__u32 map_fd;
118+
__aligned_u64 key;
119+
union {
120+
__aligned_u64 value;
121+
__aligned_u64 next_key;
122+
};
123+
};
86124
} __attribute__((aligned(8)));
87125

88126
#endif /* _UAPI__LINUX_BPF_H__ */

kernel/bpf/syscall.c

+235
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <linux/syscalls.h>
1414
#include <linux/slab.h>
1515
#include <linux/anon_inodes.h>
16+
#include <linux/file.h>
1617

1718
static LIST_HEAD(bpf_map_types);
1819

@@ -111,6 +112,228 @@ static int map_create(union bpf_attr *attr)
111112
return err;
112113
}
113114

115+
/* if error is returned, fd is released.
116+
* On success caller should complete fd access with matching fdput()
117+
*/
118+
struct bpf_map *bpf_map_get(struct fd f)
119+
{
120+
struct bpf_map *map;
121+
122+
if (!f.file)
123+
return ERR_PTR(-EBADF);
124+
125+
if (f.file->f_op != &bpf_map_fops) {
126+
fdput(f);
127+
return ERR_PTR(-EINVAL);
128+
}
129+
130+
map = f.file->private_data;
131+
132+
return map;
133+
}
134+
135+
/* helper to convert user pointers passed inside __aligned_u64 fields */
136+
static void __user *u64_to_ptr(__u64 val)
137+
{
138+
return (void __user *) (unsigned long) val;
139+
}
140+
141+
/* last field in 'union bpf_attr' used by this command */
142+
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
143+
144+
static int map_lookup_elem(union bpf_attr *attr)
145+
{
146+
void __user *ukey = u64_to_ptr(attr->key);
147+
void __user *uvalue = u64_to_ptr(attr->value);
148+
int ufd = attr->map_fd;
149+
struct fd f = fdget(ufd);
150+
struct bpf_map *map;
151+
void *key, *value;
152+
int err;
153+
154+
if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
155+
return -EINVAL;
156+
157+
map = bpf_map_get(f);
158+
if (IS_ERR(map))
159+
return PTR_ERR(map);
160+
161+
err = -ENOMEM;
162+
key = kmalloc(map->key_size, GFP_USER);
163+
if (!key)
164+
goto err_put;
165+
166+
err = -EFAULT;
167+
if (copy_from_user(key, ukey, map->key_size) != 0)
168+
goto free_key;
169+
170+
err = -ESRCH;
171+
rcu_read_lock();
172+
value = map->ops->map_lookup_elem(map, key);
173+
if (!value)
174+
goto err_unlock;
175+
176+
err = -EFAULT;
177+
if (copy_to_user(uvalue, value, map->value_size) != 0)
178+
goto err_unlock;
179+
180+
err = 0;
181+
182+
err_unlock:
183+
rcu_read_unlock();
184+
free_key:
185+
kfree(key);
186+
err_put:
187+
fdput(f);
188+
return err;
189+
}
190+
191+
#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
192+
193+
static int map_update_elem(union bpf_attr *attr)
194+
{
195+
void __user *ukey = u64_to_ptr(attr->key);
196+
void __user *uvalue = u64_to_ptr(attr->value);
197+
int ufd = attr->map_fd;
198+
struct fd f = fdget(ufd);
199+
struct bpf_map *map;
200+
void *key, *value;
201+
int err;
202+
203+
if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
204+
return -EINVAL;
205+
206+
map = bpf_map_get(f);
207+
if (IS_ERR(map))
208+
return PTR_ERR(map);
209+
210+
err = -ENOMEM;
211+
key = kmalloc(map->key_size, GFP_USER);
212+
if (!key)
213+
goto err_put;
214+
215+
err = -EFAULT;
216+
if (copy_from_user(key, ukey, map->key_size) != 0)
217+
goto free_key;
218+
219+
err = -ENOMEM;
220+
value = kmalloc(map->value_size, GFP_USER);
221+
if (!value)
222+
goto free_key;
223+
224+
err = -EFAULT;
225+
if (copy_from_user(value, uvalue, map->value_size) != 0)
226+
goto free_value;
227+
228+
/* eBPF program that use maps are running under rcu_read_lock(),
229+
* therefore all map accessors rely on this fact, so do the same here
230+
*/
231+
rcu_read_lock();
232+
err = map->ops->map_update_elem(map, key, value);
233+
rcu_read_unlock();
234+
235+
free_value:
236+
kfree(value);
237+
free_key:
238+
kfree(key);
239+
err_put:
240+
fdput(f);
241+
return err;
242+
}
243+
244+
#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
245+
246+
static int map_delete_elem(union bpf_attr *attr)
247+
{
248+
void __user *ukey = u64_to_ptr(attr->key);
249+
int ufd = attr->map_fd;
250+
struct fd f = fdget(ufd);
251+
struct bpf_map *map;
252+
void *key;
253+
int err;
254+
255+
if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
256+
return -EINVAL;
257+
258+
map = bpf_map_get(f);
259+
if (IS_ERR(map))
260+
return PTR_ERR(map);
261+
262+
err = -ENOMEM;
263+
key = kmalloc(map->key_size, GFP_USER);
264+
if (!key)
265+
goto err_put;
266+
267+
err = -EFAULT;
268+
if (copy_from_user(key, ukey, map->key_size) != 0)
269+
goto free_key;
270+
271+
rcu_read_lock();
272+
err = map->ops->map_delete_elem(map, key);
273+
rcu_read_unlock();
274+
275+
free_key:
276+
kfree(key);
277+
err_put:
278+
fdput(f);
279+
return err;
280+
}
281+
282+
/* last field in 'union bpf_attr' used by this command */
283+
#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
284+
285+
static int map_get_next_key(union bpf_attr *attr)
286+
{
287+
void __user *ukey = u64_to_ptr(attr->key);
288+
void __user *unext_key = u64_to_ptr(attr->next_key);
289+
int ufd = attr->map_fd;
290+
struct fd f = fdget(ufd);
291+
struct bpf_map *map;
292+
void *key, *next_key;
293+
int err;
294+
295+
if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
296+
return -EINVAL;
297+
298+
map = bpf_map_get(f);
299+
if (IS_ERR(map))
300+
return PTR_ERR(map);
301+
302+
err = -ENOMEM;
303+
key = kmalloc(map->key_size, GFP_USER);
304+
if (!key)
305+
goto err_put;
306+
307+
err = -EFAULT;
308+
if (copy_from_user(key, ukey, map->key_size) != 0)
309+
goto free_key;
310+
311+
err = -ENOMEM;
312+
next_key = kmalloc(map->key_size, GFP_USER);
313+
if (!next_key)
314+
goto free_key;
315+
316+
rcu_read_lock();
317+
err = map->ops->map_get_next_key(map, key, next_key);
318+
rcu_read_unlock();
319+
if (err)
320+
goto free_next_key;
321+
322+
err = -EFAULT;
323+
if (copy_to_user(unext_key, next_key, map->key_size) != 0)
324+
goto free_next_key;
325+
326+
err = 0;
327+
328+
free_next_key:
329+
kfree(next_key);
330+
free_key:
331+
kfree(key);
332+
err_put:
333+
fdput(f);
334+
return err;
335+
}
336+
114337
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
115338
{
116339
union bpf_attr attr = {};
@@ -160,6 +383,18 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
160383
case BPF_MAP_CREATE:
161384
err = map_create(&attr);
162385
break;
386+
case BPF_MAP_LOOKUP_ELEM:
387+
err = map_lookup_elem(&attr);
388+
break;
389+
case BPF_MAP_UPDATE_ELEM:
390+
err = map_update_elem(&attr);
391+
break;
392+
case BPF_MAP_DELETE_ELEM:
393+
err = map_delete_elem(&attr);
394+
break;
395+
case BPF_MAP_GET_NEXT_KEY:
396+
err = map_get_next_key(&attr);
397+
break;
163398
default:
164399
err = -EINVAL;
165400
break;

0 commit comments

Comments
 (0)