Skip to content

Commit a10423b

Browse files
4astdavem330
authored andcommitted
bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map
Primary use case is a histogram array of latency where bpf program computes the latency of block requests or other events and stores histogram of latency into array of 64 elements. All cpus are constantly running, so normal increment is not accurate, bpf_xadd causes cache ping-pong and this per-cpu approach allows fastest collision-free counters. Signed-off-by: Alexei Starovoitov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 824bd0c commit a10423b

File tree

3 files changed

+93
-11
lines changed

3 files changed

+93
-11
lines changed

include/linux/bpf.h

+1
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ struct bpf_array {
151151
union {
152152
char value[0] __aligned(8);
153153
void *ptrs[0] __aligned(8);
154+
void __percpu *pptrs[0] __aligned(8);
154155
};
155156
};
156157
#define MAX_TAIL_CALL_CNT 32

include/uapi/linux/bpf.h

+1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ enum bpf_map_type {
8282
BPF_MAP_TYPE_PROG_ARRAY,
8383
BPF_MAP_TYPE_PERF_EVENT_ARRAY,
8484
BPF_MAP_TYPE_PERCPU_HASH,
85+
BPF_MAP_TYPE_PERCPU_ARRAY,
8586
};
8687

8788
enum bpf_prog_type {

kernel/bpf/arraymap.c

+91-11
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,39 @@
1717
#include <linux/filter.h>
1818
#include <linux/perf_event.h>
1919

20+
static void bpf_array_free_percpu(struct bpf_array *array)
21+
{
22+
int i;
23+
24+
for (i = 0; i < array->map.max_entries; i++)
25+
free_percpu(array->pptrs[i]);
26+
}
27+
28+
static int bpf_array_alloc_percpu(struct bpf_array *array)
29+
{
30+
void __percpu *ptr;
31+
int i;
32+
33+
for (i = 0; i < array->map.max_entries; i++) {
34+
ptr = __alloc_percpu_gfp(array->elem_size, 8,
35+
GFP_USER | __GFP_NOWARN);
36+
if (!ptr) {
37+
bpf_array_free_percpu(array);
38+
return -ENOMEM;
39+
}
40+
array->pptrs[i] = ptr;
41+
}
42+
43+
return 0;
44+
}
45+
2046
/* Called from syscall */
2147
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
2248
{
49+
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
2350
struct bpf_array *array;
24-
u32 elem_size, array_size;
51+
u64 array_size;
52+
u32 elem_size;
2553

2654
/* check sanity of attributes */
2755
if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -36,12 +64,16 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
3664

3765
elem_size = round_up(attr->value_size, 8);
3866

39-
/* check round_up into zero and u32 overflow */
40-
if (elem_size == 0 ||
41-
attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size)
67+
array_size = sizeof(*array);
68+
if (percpu)
69+
array_size += (u64) attr->max_entries * sizeof(void *);
70+
else
71+
array_size += (u64) attr->max_entries * elem_size;
72+
73+
/* make sure there is no u32 overflow later in round_up() */
74+
if (array_size >= U32_MAX - PAGE_SIZE)
4275
return ERR_PTR(-ENOMEM);
4376

44-
array_size = sizeof(*array) + attr->max_entries * elem_size;
4577

4678
/* allocate all map elements and zero-initialize them */
4779
array = kzalloc(array_size, GFP_USER | __GFP_NOWARN);
@@ -52,12 +84,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
5284
}
5385

5486
/* copy mandatory map attributes */
87+
array->map.map_type = attr->map_type;
5588
array->map.key_size = attr->key_size;
5689
array->map.value_size = attr->value_size;
5790
array->map.max_entries = attr->max_entries;
58-
array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
5991
array->elem_size = elem_size;
6092

93+
if (!percpu)
94+
goto out;
95+
96+
array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
97+
98+
if (array_size >= U32_MAX - PAGE_SIZE ||
99+
elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) {
100+
kvfree(array);
101+
return ERR_PTR(-ENOMEM);
102+
}
103+
out:
104+
array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
105+
61106
return &array->map;
62107
}
63108

@@ -67,12 +112,24 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
67112
struct bpf_array *array = container_of(map, struct bpf_array, map);
68113
u32 index = *(u32 *)key;
69114

70-
if (index >= array->map.max_entries)
115+
if (unlikely(index >= array->map.max_entries))
71116
return NULL;
72117

73118
return array->value + array->elem_size * index;
74119
}
75120

121+
/* Called from eBPF program */
122+
static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
123+
{
124+
struct bpf_array *array = container_of(map, struct bpf_array, map);
125+
u32 index = *(u32 *)key;
126+
127+
if (unlikely(index >= array->map.max_entries))
128+
return NULL;
129+
130+
return this_cpu_ptr(array->pptrs[index]);
131+
}
132+
76133
/* Called from syscall */
77134
static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
78135
{
@@ -99,19 +156,24 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
99156
struct bpf_array *array = container_of(map, struct bpf_array, map);
100157
u32 index = *(u32 *)key;
101158

102-
if (map_flags > BPF_EXIST)
159+
if (unlikely(map_flags > BPF_EXIST))
103160
/* unknown flags */
104161
return -EINVAL;
105162

106-
if (index >= array->map.max_entries)
163+
if (unlikely(index >= array->map.max_entries))
107164
/* all elements were pre-allocated, cannot insert a new one */
108165
return -E2BIG;
109166

110-
if (map_flags == BPF_NOEXIST)
167+
if (unlikely(map_flags == BPF_NOEXIST))
111168
/* all elements already exist */
112169
return -EEXIST;
113170

114-
memcpy(array->value + array->elem_size * index, value, map->value_size);
171+
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
172+
memcpy(this_cpu_ptr(array->pptrs[index]),
173+
value, map->value_size);
174+
else
175+
memcpy(array->value + array->elem_size * index,
176+
value, map->value_size);
115177
return 0;
116178
}
117179

@@ -133,6 +195,9 @@ static void array_map_free(struct bpf_map *map)
133195
*/
134196
synchronize_rcu();
135197

198+
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
199+
bpf_array_free_percpu(array);
200+
136201
kvfree(array);
137202
}
138203

@@ -150,9 +215,24 @@ static struct bpf_map_type_list array_type __read_mostly = {
150215
.type = BPF_MAP_TYPE_ARRAY,
151216
};
152217

218+
static const struct bpf_map_ops percpu_array_ops = {
219+
.map_alloc = array_map_alloc,
220+
.map_free = array_map_free,
221+
.map_get_next_key = array_map_get_next_key,
222+
.map_lookup_elem = percpu_array_map_lookup_elem,
223+
.map_update_elem = array_map_update_elem,
224+
.map_delete_elem = array_map_delete_elem,
225+
};
226+
227+
static struct bpf_map_type_list percpu_array_type __read_mostly = {
228+
.ops = &percpu_array_ops,
229+
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
230+
};
231+
153232
static int __init register_array_map(void)
154233
{
155234
bpf_register_map_type(&array_type);
235+
bpf_register_map_type(&percpu_array_type);
156236
return 0;
157237
}
158238
late_initcall(register_array_map);

0 commit comments

Comments
 (0)