Skip to content

Commit a43eec3

Browse files
Alexei Starovoitovdavem330
Alexei Starovoitov
authored andcommitted
bpf: introduce bpf_perf_event_output() helper
This helper is used to send raw data from eBPF program into special PERF_TYPE_SOFTWARE/PERF_COUNT_SW_BPF_OUTPUT perf_event. User space needs to perf_event_open() it (either for one or all cpus) and store FD into perf_event_array (similar to bpf_perf_event_read() helper) before eBPF program can send data into it. Today the programs triggered by kprobe collect the data and either store it into the maps or print it via bpf_trace_printk() where latter is the debug facility and not suitable to stream the data. This new helper replaces such bpf_trace_printk() usage and allows programs to have dedicated channel into user space for post-processing of the raw data collected. Signed-off-by: Alexei Starovoitov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent fa128e6 commit a43eec3

File tree

5 files changed

+62
-1
lines changed

5 files changed

+62
-1
lines changed

Diff for: include/uapi/linux/bpf.h

+11
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,17 @@ enum bpf_func_id {
287287
* Return: realm if != 0
288288
*/
289289
BPF_FUNC_get_route_realm,
290+
291+
/**
292+
* bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample
293+
* @ctx: struct pt_regs*
294+
* @map: pointer to perf_event_array map
295+
* @index: index of event in the map
296+
* @data: data on stack to be output as raw data
297+
* @size: size of data
298+
* Return: 0 on success
299+
*/
300+
BPF_FUNC_perf_event_output,
290301
__BPF_FUNC_MAX_ID,
291302
};
292303

Diff for: include/uapi/linux/perf_event.h

+1
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ enum perf_sw_ids {
110110
PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
111111
PERF_COUNT_SW_EMULATION_FAULTS = 8,
112112
PERF_COUNT_SW_DUMMY = 9,
113+
PERF_COUNT_SW_BPF_OUTPUT = 10,
113114

114115
PERF_COUNT_SW_MAX, /* non-ABI */
115116
};

Diff for: kernel/bpf/arraymap.c

+2
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,8 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
295295
return (void *)attr;
296296

297297
if (attr->type != PERF_TYPE_RAW &&
298+
!(attr->type == PERF_TYPE_SOFTWARE &&
299+
attr->config == PERF_COUNT_SW_BPF_OUTPUT) &&
298300
attr->type != PERF_TYPE_HARDWARE) {
299301
perf_event_release_kernel(event);
300302
return ERR_PTR(-EINVAL);

Diff for: kernel/bpf/verifier.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ static const struct {
245245
} func_limit[] = {
246246
{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
247247
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
248+
{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
248249
};
249250

250251
static void print_verifier_state(struct verifier_env *env)
@@ -910,7 +911,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
910911
* don't allow any other map type to be passed into
911912
* the special func;
912913
*/
913-
if (bool_map != bool_func)
914+
if (bool_func && bool_map != bool_func)
914915
return -EINVAL;
915916
}
916917

Diff for: kernel/trace/bpf_trace.c

+46
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,50 @@ const struct bpf_func_proto bpf_perf_event_read_proto = {
215215
.arg2_type = ARG_ANYTHING,
216216
};
217217

218+
static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
219+
{
220+
struct pt_regs *regs = (struct pt_regs *) (long) r1;
221+
struct bpf_map *map = (struct bpf_map *) (long) r2;
222+
struct bpf_array *array = container_of(map, struct bpf_array, map);
223+
void *data = (void *) (long) r4;
224+
struct perf_sample_data sample_data;
225+
struct perf_event *event;
226+
struct perf_raw_record raw = {
227+
.size = size,
228+
.data = data,
229+
};
230+
231+
if (unlikely(index >= array->map.max_entries))
232+
return -E2BIG;
233+
234+
event = (struct perf_event *)array->ptrs[index];
235+
if (unlikely(!event))
236+
return -ENOENT;
237+
238+
if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
239+
event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
240+
return -EINVAL;
241+
242+
if (unlikely(event->oncpu != smp_processor_id()))
243+
return -EOPNOTSUPP;
244+
245+
perf_sample_data_init(&sample_data, 0, 0);
246+
sample_data.raw = &raw;
247+
perf_event_output(event, &sample_data, regs);
248+
return 0;
249+
}
250+
251+
static const struct bpf_func_proto bpf_perf_event_output_proto = {
252+
.func = bpf_perf_event_output,
253+
.gpl_only = false,
254+
.ret_type = RET_INTEGER,
255+
.arg1_type = ARG_PTR_TO_CTX,
256+
.arg2_type = ARG_CONST_MAP_PTR,
257+
.arg3_type = ARG_ANYTHING,
258+
.arg4_type = ARG_PTR_TO_STACK,
259+
.arg5_type = ARG_CONST_STACK_SIZE,
260+
};
261+
218262
static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
219263
{
220264
switch (func_id) {
@@ -242,6 +286,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
242286
return &bpf_get_smp_processor_id_proto;
243287
case BPF_FUNC_perf_event_read:
244288
return &bpf_perf_event_read_proto;
289+
case BPF_FUNC_perf_event_output:
290+
return &bpf_perf_event_output_proto;
245291
default:
246292
return NULL;
247293
}

0 commit comments

Comments
 (0)