Skip to content

Commit aaac3ba

Browse files
Alexei Starovoitovdavem330
Alexei Starovoitov
authored andcommitted
bpf: charge user for creation of BPF maps and programs
since eBPF programs and maps use kernel memory consider it 'locked' memory from user accounting point of view and charge it against RLIMIT_MEMLOCK limit. This limit is typically set to 64Kbytes by distros, so almost all bpf+tracing programs would need to increase it, since they use maps, but kernel charges maximum map size upfront. For example the hash map of 1024 elements will be charged as 64Kbyte. It's inconvenient for current users and changes current behavior for root, but probably worth doing to be consistent root vs non-root. Similar accounting logic is done by mmap of perf_event. Signed-off-by: Alexei Starovoitov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 1be7f75 commit aaac3ba

File tree

5 files changed

+72
-2
lines changed

5 files changed

+72
-2
lines changed

include/linux/bpf.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ struct bpf_map {
3636
u32 key_size;
3737
u32 value_size;
3838
u32 max_entries;
39+
u32 pages;
40+
struct user_struct *user;
3941
const struct bpf_map_ops *ops;
4042
struct work_struct work;
4143
};
@@ -128,6 +130,7 @@ struct bpf_prog_aux {
128130
const struct bpf_verifier_ops *ops;
129131
struct bpf_map **used_maps;
130132
struct bpf_prog *prog;
133+
struct user_struct *user;
131134
union {
132135
struct work_struct work;
133136
struct rcu_head rcu;

include/linux/sched.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -840,7 +840,7 @@ struct user_struct {
840840
struct hlist_node uidhash_node;
841841
kuid_t uid;
842842

843-
#ifdef CONFIG_PERF_EVENTS
843+
#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL)
844844
atomic_long_t locked_vm;
845845
#endif
846846
};

kernel/bpf/arraymap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
4949
array->map.key_size = attr->key_size;
5050
array->map.value_size = attr->value_size;
5151
array->map.max_entries = attr->max_entries;
52-
52+
array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
5353
array->elem_size = elem_size;
5454

5555
return &array->map;

kernel/bpf/hashtab.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
8888
htab->elem_size = sizeof(struct htab_elem) +
8989
round_up(htab->map.key_size, 8) +
9090
htab->map.value_size;
91+
92+
htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
93+
htab->elem_size * htab->map.max_entries,
94+
PAGE_SIZE) >> PAGE_SHIFT;
9195
return &htab->map;
9296

9397
free_htab:

kernel/bpf/syscall.c

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,38 @@ void bpf_register_map_type(struct bpf_map_type_list *tl)
4646
list_add(&tl->list_node, &bpf_map_types);
4747
}
4848

49+
static int bpf_map_charge_memlock(struct bpf_map *map)
50+
{
51+
struct user_struct *user = get_current_user();
52+
unsigned long memlock_limit;
53+
54+
memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
55+
56+
atomic_long_add(map->pages, &user->locked_vm);
57+
58+
if (atomic_long_read(&user->locked_vm) > memlock_limit) {
59+
atomic_long_sub(map->pages, &user->locked_vm);
60+
free_uid(user);
61+
return -EPERM;
62+
}
63+
map->user = user;
64+
return 0;
65+
}
66+
67+
static void bpf_map_uncharge_memlock(struct bpf_map *map)
68+
{
69+
struct user_struct *user = map->user;
70+
71+
atomic_long_sub(map->pages, &user->locked_vm);
72+
free_uid(user);
73+
}
74+
4975
/* called from workqueue */
5076
static void bpf_map_free_deferred(struct work_struct *work)
5177
{
5278
struct bpf_map *map = container_of(work, struct bpf_map, work);
5379

80+
bpf_map_uncharge_memlock(map);
5481
/* implementation dependent freeing */
5582
map->ops->map_free(map);
5683
}
@@ -110,6 +137,10 @@ static int map_create(union bpf_attr *attr)
110137

111138
atomic_set(&map->refcnt, 1);
112139

140+
err = bpf_map_charge_memlock(map);
141+
if (err)
142+
goto free_map;
143+
113144
err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC);
114145

115146
if (err < 0)
@@ -442,11 +473,37 @@ static void free_used_maps(struct bpf_prog_aux *aux)
442473
kfree(aux->used_maps);
443474
}
444475

476+
static int bpf_prog_charge_memlock(struct bpf_prog *prog)
477+
{
478+
struct user_struct *user = get_current_user();
479+
unsigned long memlock_limit;
480+
481+
memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
482+
483+
atomic_long_add(prog->pages, &user->locked_vm);
484+
if (atomic_long_read(&user->locked_vm) > memlock_limit) {
485+
atomic_long_sub(prog->pages, &user->locked_vm);
486+
free_uid(user);
487+
return -EPERM;
488+
}
489+
prog->aux->user = user;
490+
return 0;
491+
}
492+
493+
static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
494+
{
495+
struct user_struct *user = prog->aux->user;
496+
497+
atomic_long_sub(prog->pages, &user->locked_vm);
498+
free_uid(user);
499+
}
500+
445501
static void __prog_put_rcu(struct rcu_head *rcu)
446502
{
447503
struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
448504

449505
free_used_maps(aux);
506+
bpf_prog_uncharge_memlock(aux->prog);
450507
bpf_prog_free(aux->prog);
451508
}
452509

@@ -554,6 +611,10 @@ static int bpf_prog_load(union bpf_attr *attr)
554611
if (!prog)
555612
return -ENOMEM;
556613

614+
err = bpf_prog_charge_memlock(prog);
615+
if (err)
616+
goto free_prog_nouncharge;
617+
557618
prog->len = attr->insn_cnt;
558619

559620
err = -EFAULT;
@@ -595,6 +656,8 @@ static int bpf_prog_load(union bpf_attr *attr)
595656
free_used_maps:
596657
free_used_maps(prog->aux);
597658
free_prog:
659+
bpf_prog_uncharge_memlock(prog);
660+
free_prog_nouncharge:
598661
bpf_prog_free(prog);
599662
return err;
600663
}

0 commit comments

Comments
 (0)