Skip to content

Commit 9802d86

Browse files
Josef BacikAlexei Starovoitov
Josef Bacik
authored and
Alexei Starovoitov
committed
bpf: add a bpf_override_function helper
Error injection is sloppy and very ad-hoc. BPF could fill this niche perfectly with it's kprobe functionality. We could make sure errors are only triggered in specific call chains that we care about with very specific situations. Accomplish this with the bpf_override_funciton helper. This will modify the probe'd callers return value to the specified value and set the PC to an override function that simply returns, bypassing the originally probed function. This gives us a nice clean way to implement systematic error injection for all of our code paths. Acked-by: Alexei Starovoitov <[email protected]> Acked-by: Ingo Molnar <[email protected]> Signed-off-by: Josef Bacik <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 8556e50 commit 9802d86

File tree

15 files changed

+154
-9
lines changed

15 files changed

+154
-9
lines changed

Diff for: arch/Kconfig

+3
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,9 @@ config HAVE_OPTPROBES
196196
config HAVE_KPROBES_ON_FTRACE
197197
bool
198198

199+
config HAVE_KPROBE_OVERRIDE
200+
bool
201+
199202
config HAVE_NMI
200203
bool
201204

Diff for: arch/x86/Kconfig

+1
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ config X86
154154
select HAVE_KERNEL_XZ
155155
select HAVE_KPROBES
156156
select HAVE_KPROBES_ON_FTRACE
157+
select HAVE_KPROBE_OVERRIDE
157158
select HAVE_KRETPROBES
158159
select HAVE_KVM
159160
select HAVE_LIVEPATCH if X86_64

Diff for: arch/x86/include/asm/kprobes.h

+4
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ extern const int kretprobe_blacklist_size;
6767
void arch_remove_kprobe(struct kprobe *p);
6868
asmlinkage void kretprobe_trampoline(void);
6969

70+
#ifdef CONFIG_KPROBES_ON_FTRACE
71+
extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs);
72+
#endif
73+
7074
/* Architecture specific copy of original instruction*/
7175
struct arch_specific_insn {
7276
/* copy of the original instruction */

Diff for: arch/x86/include/asm/ptrace.h

+5
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
109109
return regs->ax;
110110
}
111111

112+
static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
113+
{
114+
regs->ax = rc;
115+
}
116+
112117
/*
113118
* user_mode(regs) determines whether a register set came from user
114119
* mode. On x86_32, this is true if V8086 mode was enabled OR if the

Diff for: arch/x86/kernel/kprobes/ftrace.c

+14
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p)
9797
p->ainsn.boostable = false;
9898
return 0;
9999
}
100+
101+
asmlinkage void override_func(void);
102+
asm(
103+
".type override_func, @function\n"
104+
"override_func:\n"
105+
" ret\n"
106+
".size override_func, .-override_func\n"
107+
);
108+
109+
void arch_ftrace_kprobe_override_function(struct pt_regs *regs)
110+
{
111+
regs->ip = (unsigned long)&override_func;
112+
}
113+
NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function);

Diff for: include/linux/filter.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,8 @@ struct bpf_prog {
458458
locked:1, /* Program image locked? */
459459
gpl_compatible:1, /* Is filter GPL compatible? */
460460
cb_access:1, /* Is control block accessed? */
461-
dst_needed:1; /* Do we need dst entry? */
461+
dst_needed:1, /* Do we need dst entry? */
462+
kprobe_override:1; /* Do we override a kprobe? */
462463
enum bpf_prog_type type; /* Type of BPF program */
463464
u32 len; /* Number of filter blocks */
464465
u32 jited_len; /* Size of jited insns in bytes */

Diff for: include/linux/trace_events.h

+1
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,7 @@ do { \
528528
struct perf_event;
529529

530530
DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
531+
DECLARE_PER_CPU(int, bpf_kprobe_override);
531532

532533
extern int perf_trace_init(struct perf_event *event);
533534
extern void perf_trace_destroy(struct perf_event *event);

Diff for: include/uapi/linux/bpf.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,10 @@ union bpf_attr {
677677
* @buf: buf to fill
678678
* @buf_size: size of the buf
679679
* Return : 0 on success or negative error code
680+
*
681+
* int bpf_override_return(pt_regs, rc)
682+
* @pt_regs: pointer to struct pt_regs
683+
* @rc: the return value to set
680684
*/
681685
#define __BPF_FUNC_MAPPER(FN) \
682686
FN(unspec), \
@@ -736,7 +740,8 @@ union bpf_attr {
736740
FN(xdp_adjust_meta), \
737741
FN(perf_event_read_value), \
738742
FN(perf_prog_read_value), \
739-
FN(getsockopt),
743+
FN(getsockopt), \
744+
FN(override_return),
740745

741746
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
742747
* function eBPF program intends to call

Diff for: kernel/bpf/core.c

+3
Original file line numberDiff line numberDiff line change
@@ -1320,6 +1320,9 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
13201320
bool bpf_prog_array_compatible(struct bpf_array *array,
13211321
const struct bpf_prog *fp)
13221322
{
1323+
if (fp->kprobe_override)
1324+
return false;
1325+
13231326
if (!array->owner_prog_type) {
13241327
/* There's no owner yet where we could check for
13251328
* compatibility.

Diff for: kernel/bpf/verifier.c

+2
Original file line numberDiff line numberDiff line change
@@ -4413,6 +4413,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
44134413
prog->dst_needed = 1;
44144414
if (insn->imm == BPF_FUNC_get_prandom_u32)
44154415
bpf_user_rnd_init_once();
4416+
if (insn->imm == BPF_FUNC_override_return)
4417+
prog->kprobe_override = 1;
44164418
if (insn->imm == BPF_FUNC_tail_call) {
44174419
/* If we tail call into other programs, we
44184420
* cannot make any assumptions since they can

Diff for: kernel/events/core.c

+7
Original file line numberDiff line numberDiff line change
@@ -8080,6 +8080,13 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
80808080
return -EINVAL;
80818081
}
80828082

8083+
/* Kprobe override only works for kprobes, not uprobes. */
8084+
if (prog->kprobe_override &&
8085+
!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) {
8086+
bpf_prog_put(prog);
8087+
return -EINVAL;
8088+
}
8089+
80838090
if (is_tracepoint || is_syscall_tp) {
80848091
int off = trace_event_get_offsets(event->tp_event);
80858092

Diff for: kernel/trace/Kconfig

+11
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,17 @@ config FUNCTION_PROFILER
529529

530530
If in doubt, say N.
531531

532+
config BPF_KPROBE_OVERRIDE
533+
bool "Enable BPF programs to override a kprobed function"
534+
depends on BPF_EVENTS
535+
depends on KPROBES_ON_FTRACE
536+
depends on HAVE_KPROBE_OVERRIDE
537+
depends on DYNAMIC_FTRACE_WITH_REGS
538+
default n
539+
help
540+
Allows BPF to override the execution of a probed function and
541+
set a different return value. This is used for error injection.
542+
532543
config FTRACE_MCOUNT_RECORD
533544
def_bool y
534545
depends on DYNAMIC_FTRACE

Diff for: kernel/trace/bpf_trace.c

+35
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
#include <linux/filter.h>
1414
#include <linux/uaccess.h>
1515
#include <linux/ctype.h>
16+
#include <linux/kprobes.h>
17+
#include <asm/kprobes.h>
18+
19+
#include "trace_probe.h"
1620
#include "trace.h"
1721

1822
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
@@ -76,6 +80,24 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
7680
}
7781
EXPORT_SYMBOL_GPL(trace_call_bpf);
7882

83+
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
84+
BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
85+
{
86+
__this_cpu_write(bpf_kprobe_override, 1);
87+
regs_set_return_value(regs, rc);
88+
arch_ftrace_kprobe_override_function(regs);
89+
return 0;
90+
}
91+
92+
static const struct bpf_func_proto bpf_override_return_proto = {
93+
.func = bpf_override_return,
94+
.gpl_only = true,
95+
.ret_type = RET_INTEGER,
96+
.arg1_type = ARG_PTR_TO_CTX,
97+
.arg2_type = ARG_ANYTHING,
98+
};
99+
#endif
100+
79101
BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
80102
{
81103
int ret;
@@ -551,6 +573,10 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
551573
return &bpf_get_stackid_proto;
552574
case BPF_FUNC_perf_event_read_value:
553575
return &bpf_perf_event_read_value_proto;
576+
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
577+
case BPF_FUNC_override_return:
578+
return &bpf_override_return_proto;
579+
#endif
554580
default:
555581
return tracing_func_proto(func_id);
556582
}
@@ -768,6 +794,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event,
768794
struct bpf_prog_array *new_array;
769795
int ret = -EEXIST;
770796

797+
/*
798+
* Kprobe override only works for ftrace based kprobes, and only if they
799+
* are on the opt-in list.
800+
*/
801+
if (prog->kprobe_override &&
802+
(!trace_kprobe_ftrace(event->tp_event) ||
803+
!trace_kprobe_error_injectable(event->tp_event)))
804+
return -EINVAL;
805+
771806
mutex_lock(&bpf_event_mutex);
772807

773808
if (event->prog)

Diff for: kernel/trace/trace_kprobe.c

+48-7
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ struct trace_kprobe {
4242
(offsetof(struct trace_kprobe, tp.args) + \
4343
(sizeof(struct probe_arg) * (n)))
4444

45+
DEFINE_PER_CPU(int, bpf_kprobe_override);
4546

4647
static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
4748
{
@@ -87,6 +88,27 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
8788
return nhit;
8889
}
8990

91+
int trace_kprobe_ftrace(struct trace_event_call *call)
92+
{
93+
struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
94+
return kprobe_ftrace(&tk->rp.kp);
95+
}
96+
97+
int trace_kprobe_error_injectable(struct trace_event_call *call)
98+
{
99+
struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
100+
unsigned long addr;
101+
102+
if (tk->symbol) {
103+
addr = (unsigned long)
104+
kallsyms_lookup_name(trace_kprobe_symbol(tk));
105+
addr += tk->rp.kp.offset;
106+
} else {
107+
addr = (unsigned long)tk->rp.kp.addr;
108+
}
109+
return within_kprobe_error_injection_list(addr);
110+
}
111+
90112
static int register_kprobe_event(struct trace_kprobe *tk);
91113
static int unregister_kprobe_event(struct trace_kprobe *tk);
92114

@@ -1170,7 +1192,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call)
11701192
#ifdef CONFIG_PERF_EVENTS
11711193

11721194
/* Kprobe profile handler */
1173-
static void
1195+
static int
11741196
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
11751197
{
11761198
struct trace_event_call *call = &tk->tp.call;
@@ -1179,12 +1201,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
11791201
int size, __size, dsize;
11801202
int rctx;
11811203

1182-
if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
1183-
return;
1204+
if (bpf_prog_array_valid(call)) {
1205+
int ret;
1206+
1207+
ret = trace_call_bpf(call, regs);
1208+
1209+
/*
1210+
* We need to check and see if we modified the pc of the
1211+
* pt_regs, and if so clear the kprobe and return 1 so that we
1212+
* don't do the instruction skipping. Also reset our state so
1213+
* we are clean the next pass through.
1214+
*/
1215+
if (__this_cpu_read(bpf_kprobe_override)) {
1216+
__this_cpu_write(bpf_kprobe_override, 0);
1217+
reset_current_kprobe();
1218+
return 1;
1219+
}
1220+
if (!ret)
1221+
return 0;
1222+
}
11841223

11851224
head = this_cpu_ptr(call->perf_events);
11861225
if (hlist_empty(head))
1187-
return;
1226+
return 0;
11881227

11891228
dsize = __get_data_size(&tk->tp, regs);
11901229
__size = sizeof(*entry) + tk->tp.size + dsize;
@@ -1193,13 +1232,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
11931232

11941233
entry = perf_trace_buf_alloc(size, NULL, &rctx);
11951234
if (!entry)
1196-
return;
1235+
return 0;
11971236

11981237
entry->ip = (unsigned long)tk->rp.kp.addr;
11991238
memset(&entry[1], 0, dsize);
12001239
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
12011240
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
12021241
head, NULL);
1242+
return 0;
12031243
}
12041244
NOKPROBE_SYMBOL(kprobe_perf_func);
12051245

@@ -1275,16 +1315,17 @@ static int kprobe_register(struct trace_event_call *event,
12751315
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
12761316
{
12771317
struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
1318+
int ret = 0;
12781319

12791320
raw_cpu_inc(*tk->nhit);
12801321

12811322
if (tk->tp.flags & TP_FLAG_TRACE)
12821323
kprobe_trace_func(tk, regs);
12831324
#ifdef CONFIG_PERF_EVENTS
12841325
if (tk->tp.flags & TP_FLAG_PROFILE)
1285-
kprobe_perf_func(tk, regs);
1326+
ret = kprobe_perf_func(tk, regs);
12861327
#endif
1287-
return 0; /* We don't tweek kernel, so just return 0 */
1328+
return ret;
12881329
}
12891330
NOKPROBE_SYMBOL(kprobe_dispatcher);
12901331

Diff for: kernel/trace/trace_probe.h

+12
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,8 @@ struct symbol_cache;
252252
unsigned long update_symbol_cache(struct symbol_cache *sc);
253253
void free_symbol_cache(struct symbol_cache *sc);
254254
struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
255+
int trace_kprobe_ftrace(struct trace_event_call *call);
256+
int trace_kprobe_error_injectable(struct trace_event_call *call);
255257
#else
256258
/* uprobes do not support symbol fetch methods */
257259
#define fetch_symbol_u8 NULL
@@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset)
277279
{
278280
return NULL;
279281
}
282+
283+
static inline int trace_kprobe_ftrace(struct trace_event_call *call)
284+
{
285+
return 0;
286+
}
287+
288+
static inline int trace_kprobe_error_injectable(struct trace_event_call *call)
289+
{
290+
return 0;
291+
}
280292
#endif /* CONFIG_KPROBE_EVENTS */
281293

282294
struct probe_arg {

0 commit comments

Comments
 (0)