Skip to content

Commit 3745dc2

Browse files
author
Alexei Starovoitov
committed
Merge branch 'writeable-bpf-tracepoints'
Matt Mullins says: ==================== This adds an opt-in interface for tracepoints to expose a writable context to BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE programs that are attached, while supporting read-only access from existing BPF_PROG_TYPE_RAW_TRACEPOINT programs, as well as from non-BPF-based tracepoints. The initial motivation is to support tracing that can be observed from the remote end of an NBD socket, e.g. by adding flags to the struct nbd_request header. Earlier attempts included adding an NBD-specific tracepoint fd, but in code review, I was recommended to implement it more generically -- as a result, this patchset is far simpler than my initial try. v4->v5: * rebased onto bpf-next/master and fixed merge conflicts * "tools: sync bpf.h" also syncs comments that have previously changed in bpf-next v3->v4: * fixed a silly copy/paste typo in include/trace/events/bpf_test_run.h (_TRACE_NBD_H -> _TRACE_BPF_TEST_RUN_H) * fixed incorrect/misleading wording in patch 1's commit message, since the pointer cannot be directly dereferenced in a BPF_PROG_TYPE_RAW_TRACEPOINT * cleaned up the error message wording if the prog_tests fail * Addressed feedback from Yonghong * reject non-pointer-sized accesses to the buffer pointer * use sizeof(struct nbd_request) as one-byte-past-the-end in raw_tp_writable_reject_nbd_invalid.c * use BPF_MOV64_IMM instead of BPF_LD_IMM64 v2->v3: * Andrew addressed Josef's comments: * C-style commenting in nbd.c * Collapsed identical events into a single DECLARE_EVENT_CLASS. This saves about 2kB of kernel text v1->v2: * add selftests * sync tools/include/uapi/linux/bpf.h * reject variable offset into the buffer * add string representation of PTR_TO_TP_BUFFER to reg_type_str ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 34b8ab0 + e950e84 commit 3745dc2

File tree

19 files changed

+433
-5
lines changed

19 files changed

+433
-5
lines changed

Diff for: MAINTAINERS

+1
Original file line numberDiff line numberDiff line change
@@ -10741,6 +10741,7 @@ L: [email protected]
1074110741
1074210742
F: Documentation/blockdev/nbd.txt
1074310743
F: drivers/block/nbd.c
10744+
F: include/trace/events/nbd.h
1074410745
F: include/uapi/linux/nbd.h
1074510746

1074610747
NETWORK DROP MONITOR

Diff for: drivers/block/nbd.c

+13
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@
4444
#include <linux/nbd-netlink.h>
4545
#include <net/genetlink.h>
4646

47+
#define CREATE_TRACE_POINTS
48+
#include <trace/events/nbd.h>
49+
4750
static DEFINE_IDR(nbd_index_idr);
4851
static DEFINE_MUTEX(nbd_index_mutex);
4952
static int nbd_total_devices = 0;
@@ -510,6 +513,10 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
510513
if (sent) {
511514
if (sent >= sizeof(request)) {
512515
skip = sent - sizeof(request);
516+
517+
/* initialize handle for tracing purposes */
518+
handle = nbd_cmd_handle(cmd);
519+
513520
goto send_pages;
514521
}
515522
iov_iter_advance(&from, sent);
@@ -526,11 +533,14 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
526533
handle = nbd_cmd_handle(cmd);
527534
memcpy(request.handle, &handle, sizeof(handle));
528535

536+
trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));
537+
529538
dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
530539
req, nbdcmd_to_ascii(type),
531540
(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
532541
result = sock_xmit(nbd, index, 1, &from,
533542
(type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
543+
trace_nbd_header_sent(req, handle);
534544
if (result <= 0) {
535545
if (was_interrupted(result)) {
536546
/* If we havne't sent anything we can just return BUSY,
@@ -603,6 +613,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
603613
bio = next;
604614
}
605615
out:
616+
trace_nbd_payload_sent(req, handle);
606617
nsock->pending = NULL;
607618
nsock->sent = 0;
608619
return 0;
@@ -650,6 +661,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
650661
tag, req);
651662
return ERR_PTR(-ENOENT);
652663
}
664+
trace_nbd_header_received(req, handle);
653665
cmd = blk_mq_rq_to_pdu(req);
654666

655667
mutex_lock(&cmd->lock);
@@ -703,6 +715,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
703715
}
704716
}
705717
out:
718+
trace_nbd_payload_received(req, handle);
706719
mutex_unlock(&cmd->lock);
707720
return ret ? ERR_PTR(ret) : cmd;
708721
}

Diff for: include/linux/bpf.h

+2
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ enum bpf_reg_type {
272272
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
273273
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
274274
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
275+
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
275276
};
276277

277278
/* The information passed from prog-specific *_is_valid_access
@@ -361,6 +362,7 @@ struct bpf_prog_aux {
361362
u32 used_map_cnt;
362363
u32 max_ctx_offset;
363364
u32 max_pkt_offset;
365+
u32 max_tp_access;
364366
u32 stack_depth;
365367
u32 id;
366368
u32 func_cnt; /* used by non-func prog as the number of func progs */

Diff for: include/linux/bpf_types.h

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
2525
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
2626
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
2727
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
28+
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
2829
#endif
2930
#ifdef CONFIG_CGROUP_BPF
3031
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)

Diff for: include/linux/tracepoint-defs.h

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ struct bpf_raw_event_map {
4545
struct tracepoint *tp;
4646
void *bpf_func;
4747
u32 num_args;
48+
u32 writable_size;
4849
} __aligned(32);
4950

5051
#endif

Diff for: include/trace/bpf_probe.h

+25-2
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \
6969
* to make sure that if the tracepoint handling changes, the
7070
* bpf probe will fail to compile unless it too is updated.
7171
*/
72-
#undef DEFINE_EVENT
73-
#define DEFINE_EVENT(template, call, proto, args) \
72+
#define __DEFINE_EVENT(template, call, proto, args, size) \
7473
static inline void bpf_test_probe_##call(void) \
7574
{ \
7675
check_trace_callback_type_##call(__bpf_trace_##template); \
@@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \
8180
.tp = &__tracepoint_##call, \
8281
.bpf_func = (void *)__bpf_trace_##template, \
8382
.num_args = COUNT_ARGS(args), \
83+
.writable_size = size, \
8484
};
8585

86+
#define FIRST(x, ...) x
87+
88+
#undef DEFINE_EVENT_WRITABLE
89+
#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \
90+
static inline void bpf_test_buffer_##call(void) \
91+
{ \
92+
/* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \
93+
* BUILD_BUG_ON_ZERO() uses a different mechanism that is not \
94+
* dead-code-eliminated. \
95+
*/ \
96+
FIRST(proto); \
97+
(void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \
98+
} \
99+
__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size)
100+
101+
#undef DEFINE_EVENT
102+
#define DEFINE_EVENT(template, call, proto, args) \
103+
__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0)
86104

87105
#undef DEFINE_EVENT_PRINT
88106
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
89107
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
90108

91109
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
110+
111+
#undef DEFINE_EVENT_WRITABLE
112+
#undef __DEFINE_EVENT
113+
#undef FIRST
114+
92115
#endif /* CONFIG_BPF_EVENTS */

Diff for: include/trace/events/bpf_test_run.h

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#undef TRACE_SYSTEM
3+
#define TRACE_SYSTEM bpf_test_run
4+
5+
#if !defined(_TRACE_BPF_TEST_RUN_H) || defined(TRACE_HEADER_MULTI_READ)
6+
#define _TRACE_BPF_TEST_RUN_H
7+
8+
#include <linux/tracepoint.h>
9+
10+
DECLARE_EVENT_CLASS(bpf_test_finish,
11+
12+
TP_PROTO(int *err),
13+
14+
TP_ARGS(err),
15+
16+
TP_STRUCT__entry(
17+
__field(int, err)
18+
),
19+
20+
TP_fast_assign(
21+
__entry->err = *err;
22+
),
23+
24+
TP_printk("bpf_test_finish with err=%d", __entry->err)
25+
);
26+
27+
#ifdef DEFINE_EVENT_WRITABLE
28+
#undef BPF_TEST_RUN_DEFINE_EVENT
29+
#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \
30+
DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \
31+
PARAMS(args), size)
32+
#else
33+
#undef BPF_TEST_RUN_DEFINE_EVENT
34+
#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \
35+
DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args))
36+
#endif
37+
38+
BPF_TEST_RUN_DEFINE_EVENT(bpf_test_finish, bpf_test_finish,
39+
40+
TP_PROTO(int *err),
41+
42+
TP_ARGS(err),
43+
44+
sizeof(int)
45+
);
46+
47+
#endif
48+
49+
/* This part must be outside protection */
50+
#include <trace/define_trace.h>

Diff for: include/trace/events/nbd.h

+107
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#undef TRACE_SYSTEM
3+
#define TRACE_SYSTEM nbd
4+
5+
#if !defined(_TRACE_NBD_H) || defined(TRACE_HEADER_MULTI_READ)
6+
#define _TRACE_NBD_H
7+
8+
#include <linux/tracepoint.h>
9+
10+
DECLARE_EVENT_CLASS(nbd_transport_event,
11+
12+
TP_PROTO(struct request *req, u64 handle),
13+
14+
TP_ARGS(req, handle),
15+
16+
TP_STRUCT__entry(
17+
__field(struct request *, req)
18+
__field(u64, handle)
19+
),
20+
21+
TP_fast_assign(
22+
__entry->req = req;
23+
__entry->handle = handle;
24+
),
25+
26+
TP_printk(
27+
"nbd transport event: request %p, handle 0x%016llx",
28+
__entry->req,
29+
__entry->handle
30+
)
31+
);
32+
33+
DEFINE_EVENT(nbd_transport_event, nbd_header_sent,
34+
35+
TP_PROTO(struct request *req, u64 handle),
36+
37+
TP_ARGS(req, handle)
38+
);
39+
40+
DEFINE_EVENT(nbd_transport_event, nbd_payload_sent,
41+
42+
TP_PROTO(struct request *req, u64 handle),
43+
44+
TP_ARGS(req, handle)
45+
);
46+
47+
DEFINE_EVENT(nbd_transport_event, nbd_header_received,
48+
49+
TP_PROTO(struct request *req, u64 handle),
50+
51+
TP_ARGS(req, handle)
52+
);
53+
54+
DEFINE_EVENT(nbd_transport_event, nbd_payload_received,
55+
56+
TP_PROTO(struct request *req, u64 handle),
57+
58+
TP_ARGS(req, handle)
59+
);
60+
61+
DECLARE_EVENT_CLASS(nbd_send_request,
62+
63+
TP_PROTO(struct nbd_request *nbd_request, int index,
64+
struct request *rq),
65+
66+
TP_ARGS(nbd_request, index, rq),
67+
68+
TP_STRUCT__entry(
69+
__field(struct nbd_request *, nbd_request)
70+
__field(u64, dev_index)
71+
__field(struct request *, request)
72+
),
73+
74+
TP_fast_assign(
75+
__entry->nbd_request = 0;
76+
__entry->dev_index = index;
77+
__entry->request = rq;
78+
),
79+
80+
TP_printk("nbd%lld: request %p", __entry->dev_index, __entry->request)
81+
);
82+
83+
#ifdef DEFINE_EVENT_WRITABLE
84+
#undef NBD_DEFINE_EVENT
85+
#define NBD_DEFINE_EVENT(template, call, proto, args, size) \
86+
DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \
87+
PARAMS(args), size)
88+
#else
89+
#undef NBD_DEFINE_EVENT
90+
#define NBD_DEFINE_EVENT(template, call, proto, args, size) \
91+
DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args))
92+
#endif
93+
94+
NBD_DEFINE_EVENT(nbd_send_request, nbd_send_request,
95+
96+
TP_PROTO(struct nbd_request *nbd_request, int index,
97+
struct request *rq),
98+
99+
TP_ARGS(nbd_request, index, rq),
100+
101+
sizeof(struct nbd_request)
102+
);
103+
104+
#endif
105+
106+
/* This part must be outside protection */
107+
#include <trace/define_trace.h>

Diff for: include/uapi/linux/bpf.h

+1
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ enum bpf_prog_type {
168168
BPF_PROG_TYPE_SK_REUSEPORT,
169169
BPF_PROG_TYPE_FLOW_DISSECTOR,
170170
BPF_PROG_TYPE_CGROUP_SYSCTL,
171+
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
171172
};
172173

173174
enum bpf_attach_type {

Diff for: kernel/bpf/syscall.c

+6-2
Original file line numberDiff line numberDiff line change
@@ -1789,12 +1789,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
17891789
}
17901790
raw_tp->btp = btp;
17911791

1792-
prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
1793-
BPF_PROG_TYPE_RAW_TRACEPOINT);
1792+
prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
17941793
if (IS_ERR(prog)) {
17951794
err = PTR_ERR(prog);
17961795
goto out_free_tp;
17971796
}
1797+
if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
1798+
prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
1799+
err = -EINVAL;
1800+
goto out_put_prog;
1801+
}
17981802

17991803
err = bpf_probe_register(raw_tp->btp, prog);
18001804
if (err)

Diff for: kernel/bpf/verifier.c

+31
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,7 @@ static const char * const reg_type_str[] = {
405405
[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
406406
[PTR_TO_TCP_SOCK] = "tcp_sock",
407407
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
408+
[PTR_TO_TP_BUFFER] = "tp_buffer",
408409
};
409410

410411
static char slot_type_char[] = {
@@ -1993,6 +1994,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env,
19931994
return 0;
19941995
}
19951996

1997+
static int check_tp_buffer_access(struct bpf_verifier_env *env,
1998+
const struct bpf_reg_state *reg,
1999+
int regno, int off, int size)
2000+
{
2001+
if (off < 0) {
2002+
verbose(env,
2003+
"R%d invalid tracepoint buffer access: off=%d, size=%d",
2004+
regno, off, size);
2005+
return -EACCES;
2006+
}
2007+
if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
2008+
char tn_buf[48];
2009+
2010+
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2011+
verbose(env,
2012+
"R%d invalid variable buffer offset: off=%d, var_off=%s",
2013+
regno, off, tn_buf);
2014+
return -EACCES;
2015+
}
2016+
if (off + size > env->prog->aux->max_tp_access)
2017+
env->prog->aux->max_tp_access = off + size;
2018+
2019+
return 0;
2020+
}
2021+
2022+
19962023
/* truncate register to smaller size (in bytes)
19972024
* must be called with size < BPF_REG_SIZE
19982025
*/
@@ -2137,6 +2164,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
21372164
err = check_sock_access(env, insn_idx, regno, off, size, t);
21382165
if (!err && value_regno >= 0)
21392166
mark_reg_unknown(env, regs, value_regno);
2167+
} else if (reg->type == PTR_TO_TP_BUFFER) {
2168+
err = check_tp_buffer_access(env, reg, regno, off, size);
2169+
if (!err && t == BPF_READ && value_regno >= 0)
2170+
mark_reg_unknown(env, regs, value_regno);
21402171
} else {
21412172
verbose(env, "R%d invalid mem access '%s'\n", regno,
21422173
reg_type_str[reg->type]);

0 commit comments

Comments
 (0)