Skip to content

Commit c6287f1

Browse files
author
Alexei Starovoitov
committed
Merge branch 'introduce-load-acquire-and-store-release-bpf-instructions'
Peilin Ye says: ==================== Introduce load-acquire and store-release BPF instructions This patchset adds kernel support for BPF load-acquire and store-release instructions (for background, please see [1]), including core/verifier and arm64/x86-64 JIT compiler changes, as well as selftests. riscv64 is also planned to be supported. The corresponding LLVM changes can be found at: llvm/llvm-project#108636 The first 3 patches from v4 have already been applied: - [bpf-next,v4,01/10] bpf/verifier: Factor out atomic_ptr_type_ok() https://git.kernel.org/bpf/bpf-next/c/b2d9ef71d4c9 - [bpf-next,v4,02/10] bpf/verifier: Factor out check_atomic_rmw() https://git.kernel.org/bpf/bpf-next/c/d430c46c7580 - [bpf-next,v4,03/10] bpf/verifier: Factor out check_load_mem() and check_store_reg() https://git.kernel.org/bpf/bpf-next/c/d38ad248fb7a Please refer to the LLVM PR and individual kernel patches for details. Thanks! v5: https://lore.kernel.org/all/[email protected]/ v5..v6 change: o (Alexei) avoid using #ifndef in verifier.c v4: https://lore.kernel.org/bpf/[email protected]/ v4..v5 notable changes: o (kernel test robot) for 32-bit arches: make the verifier reject 64-bit load-acquires/store-releases, and fix build error in interpreter changes * tested ARCH=arc build following instructions from kernel test robot o (Alexei) drop Documentation/ patch (v4 10/10) for now v3: https://lore.kernel.org/bpf/[email protected]/ v3..v4 notable changes: o (Alexei) add x86-64 JIT support (including arena) o add Acked-by: tags from Xu v2: https://lore.kernel.org/bpf/[email protected]/ v2..v3 notable changes: o (Alexei) change encoding to BPF_LOAD_ACQ=0x100, BPF_STORE_REL=0x110 o add Acked-by: tags from Ilya and Eduard o make new selftests depend on: * __clang_major__ >= 18, and * ENABLE_ATOMICS_TESTS is defined (currently this means -mcpu=v3 or v4), and * JIT supports load_acq/store_rel (currenty only arm64) o work around llvm-17 CI job failure by conditionally define __arena_global variables as 64-bit if __clang_major__ < 18, to make sure .addr_space.1 has no holes o add Google copyright notice in new files v1: https://lore.kernel.org/all/[email protected]/ v1..v2 notable changes: o (Eduard) for x86 and s390, make bpf_jit_supports_insn(..., /*in_arena=*/true) return false for load_acq/store_rel o add Eduard's Acked-by: tag o (Eduard) extract LDX and non-ATOMIC STX handling into helpers, see PATCH v2 3/9 o allow unpriv programs to store-release pointers to stack o (Alexei) make it clearer in the interpreter code (PATCH v2 4/9) that only W and DW are supported for atomic RMW o test misaligned load_acq/store_rel o (Eduard) other selftests/ changes: * test load_acq/store_rel with !atomic_ptr_type_ok() pointers: - PTR_TO_CTX, for is_ctx_reg() - PTR_TO_PACKET, for is_pkt_reg() - PTR_TO_FLOW_KEYS, for is_flow_key_reg() - PTR_TO_SOCKET, for is_sk_reg() * drop atomics/ tests * delete unnecessary 'pid' checks from arena_atomics/ tests * avoid depending on __BPF_FEATURE_LOAD_ACQ_STORE_REL, use __imm_insn() and inline asm macros instead RFC v1: https://lore.kernel.org/all/[email protected] RFC v1..v1 notable changes: o 1-2/8: minor verifier.c refactoring patches o 3/8: core/verifier changes * (Eduard) handle load-acquire properly in backtrack_insn() * (Eduard) avoid skipping checks (e.g., bpf_jit_supports_insn()) for load-acquires * track the value stored by store-releases, just like how non-atomic STX instructions are handled * (Eduard) add missing link in commit message * (Eduard) always print 'r' for disasm.c changes o 4/8: arm64/insn: avoid treating load_acq/store_rel as load_ex/store_ex o 5/8: arm64/insn: add load_acq/store_rel * (Xu) include Should-Be-One (SBO) bits in "mask" and "value", to avoid setting fixed bits during runtime (JIT-compile time) o 6/8: arm64 JIT compiler changes * (Xu) use emit_a64_add_i() for "pointer + offset" to optimize code emission o 7/8: selftests * (Eduard) avoid adding new tests to the 'test_verifier' runner * add more tests, e.g., checking mark_precise logic o 8/8: instruction-set.rst changes [1] https://lore.kernel.org/all/[email protected]/ Thanks, ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents ad55432 + 953df09 commit c6287f1

File tree

19 files changed

+1081
-33
lines changed

19 files changed

+1081
-33
lines changed

arch/arm64/include/asm/insn.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,10 @@ enum aarch64_insn_ldst_type {
188188
AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX,
189189
AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX,
190190
AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX,
191+
AARCH64_INSN_LDST_LOAD_ACQ,
191192
AARCH64_INSN_LDST_LOAD_EX,
192193
AARCH64_INSN_LDST_LOAD_ACQ_EX,
194+
AARCH64_INSN_LDST_STORE_REL,
193195
AARCH64_INSN_LDST_STORE_EX,
194196
AARCH64_INSN_LDST_STORE_REL_EX,
195197
AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET,
@@ -351,8 +353,10 @@ __AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000)
351353
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
352354
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
353355
__AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000)
354-
__AARCH64_INSN_FUNCS(load_ex, 0x3F400000, 0x08400000)
355-
__AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
356+
__AARCH64_INSN_FUNCS(load_acq, 0x3FDFFC00, 0x08DFFC00)
357+
__AARCH64_INSN_FUNCS(store_rel, 0x3FDFFC00, 0x089FFC00)
358+
__AARCH64_INSN_FUNCS(load_ex, 0x3FC00000, 0x08400000)
359+
__AARCH64_INSN_FUNCS(store_ex, 0x3FC00000, 0x08000000)
356360
__AARCH64_INSN_FUNCS(mops, 0x3B200C00, 0x19000400)
357361
__AARCH64_INSN_FUNCS(stp, 0x7FC00000, 0x29000000)
358362
__AARCH64_INSN_FUNCS(ldp, 0x7FC00000, 0x29400000)
@@ -602,6 +606,10 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
602606
int offset,
603607
enum aarch64_insn_variant variant,
604608
enum aarch64_insn_ldst_type type);
609+
u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg,
610+
enum aarch64_insn_register base,
611+
enum aarch64_insn_size_type size,
612+
enum aarch64_insn_ldst_type type);
605613
u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
606614
enum aarch64_insn_register base,
607615
enum aarch64_insn_register state,

arch/arm64/lib/insn.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,35 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
540540
offset >> shift);
541541
}
542542

543+
u32 aarch64_insn_gen_load_acq_store_rel(enum aarch64_insn_register reg,
544+
enum aarch64_insn_register base,
545+
enum aarch64_insn_size_type size,
546+
enum aarch64_insn_ldst_type type)
547+
{
548+
u32 insn;
549+
550+
switch (type) {
551+
case AARCH64_INSN_LDST_LOAD_ACQ:
552+
insn = aarch64_insn_get_load_acq_value();
553+
break;
554+
case AARCH64_INSN_LDST_STORE_REL:
555+
insn = aarch64_insn_get_store_rel_value();
556+
break;
557+
default:
558+
pr_err("%s: unknown load-acquire/store-release encoding %d\n",
559+
__func__, type);
560+
return AARCH64_BREAK_FAULT;
561+
}
562+
563+
insn = aarch64_insn_encode_ldst_size(size, insn);
564+
565+
insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
566+
reg);
567+
568+
return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
569+
base);
570+
}
571+
543572
u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
544573
enum aarch64_insn_register base,
545574
enum aarch64_insn_register state,

arch/arm64/net/bpf_jit.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,26 @@
119119
aarch64_insn_gen_load_store_ex(Rt, Rn, Rs, A64_SIZE(sf), \
120120
AARCH64_INSN_LDST_STORE_REL_EX)
121121

122+
/* Load-acquire & store-release */
123+
#define A64_LDAR(Rt, Rn, size) \
124+
aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \
125+
AARCH64_INSN_LDST_LOAD_ACQ)
126+
#define A64_STLR(Rt, Rn, size) \
127+
aarch64_insn_gen_load_acq_store_rel(Rt, Rn, AARCH64_INSN_SIZE_##size, \
128+
AARCH64_INSN_LDST_STORE_REL)
129+
130+
/* Rt = [Rn] (load acquire) */
131+
#define A64_LDARB(Wt, Xn) A64_LDAR(Wt, Xn, 8)
132+
#define A64_LDARH(Wt, Xn) A64_LDAR(Wt, Xn, 16)
133+
#define A64_LDAR32(Wt, Xn) A64_LDAR(Wt, Xn, 32)
134+
#define A64_LDAR64(Xt, Xn) A64_LDAR(Xt, Xn, 64)
135+
136+
/* [Rn] = Rt (store release) */
137+
#define A64_STLRB(Wt, Xn) A64_STLR(Wt, Xn, 8)
138+
#define A64_STLRH(Wt, Xn) A64_STLR(Wt, Xn, 16)
139+
#define A64_STLR32(Wt, Xn) A64_STLR(Wt, Xn, 32)
140+
#define A64_STLR64(Xt, Xn) A64_STLR(Xt, Xn, 64)
141+
122142
/*
123143
* LSE atomics
124144
*

arch/arm64/net/bpf_jit_comp.c

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,81 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
647647
return 0;
648648
}
649649

650+
static int emit_atomic_ld_st(const struct bpf_insn *insn, struct jit_ctx *ctx)
651+
{
652+
const s32 imm = insn->imm;
653+
const s16 off = insn->off;
654+
const u8 code = insn->code;
655+
const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC;
656+
const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
657+
const u8 dst = bpf2a64[insn->dst_reg];
658+
const u8 src = bpf2a64[insn->src_reg];
659+
const u8 tmp = bpf2a64[TMP_REG_1];
660+
u8 reg;
661+
662+
switch (imm) {
663+
case BPF_LOAD_ACQ:
664+
reg = src;
665+
break;
666+
case BPF_STORE_REL:
667+
reg = dst;
668+
break;
669+
default:
670+
pr_err_once("unknown atomic load/store op code %02x\n", imm);
671+
return -EINVAL;
672+
}
673+
674+
if (off) {
675+
emit_a64_add_i(1, tmp, reg, tmp, off, ctx);
676+
reg = tmp;
677+
}
678+
if (arena) {
679+
emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx);
680+
reg = tmp;
681+
}
682+
683+
switch (imm) {
684+
case BPF_LOAD_ACQ:
685+
switch (BPF_SIZE(code)) {
686+
case BPF_B:
687+
emit(A64_LDARB(dst, reg), ctx);
688+
break;
689+
case BPF_H:
690+
emit(A64_LDARH(dst, reg), ctx);
691+
break;
692+
case BPF_W:
693+
emit(A64_LDAR32(dst, reg), ctx);
694+
break;
695+
case BPF_DW:
696+
emit(A64_LDAR64(dst, reg), ctx);
697+
break;
698+
}
699+
break;
700+
case BPF_STORE_REL:
701+
switch (BPF_SIZE(code)) {
702+
case BPF_B:
703+
emit(A64_STLRB(src, reg), ctx);
704+
break;
705+
case BPF_H:
706+
emit(A64_STLRH(src, reg), ctx);
707+
break;
708+
case BPF_W:
709+
emit(A64_STLR32(src, reg), ctx);
710+
break;
711+
case BPF_DW:
712+
emit(A64_STLR64(src, reg), ctx);
713+
break;
714+
}
715+
break;
716+
default:
717+
pr_err_once("unexpected atomic load/store op code %02x\n",
718+
imm);
719+
return -EINVAL;
720+
}
721+
722+
return 0;
723+
}
724+
650725
#ifdef CONFIG_ARM64_LSE_ATOMICS
651726
static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
652727
{
@@ -1641,11 +1716,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
16411716
return ret;
16421717
break;
16431718

1719+
case BPF_STX | BPF_ATOMIC | BPF_B:
1720+
case BPF_STX | BPF_ATOMIC | BPF_H:
16441721
case BPF_STX | BPF_ATOMIC | BPF_W:
16451722
case BPF_STX | BPF_ATOMIC | BPF_DW:
1723+
case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
1724+
case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
16461725
case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
16471726
case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
1648-
if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
1727+
if (bpf_atomic_is_load_store(insn))
1728+
ret = emit_atomic_ld_st(insn, ctx);
1729+
else if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
16491730
ret = emit_lse_atomic(insn, ctx);
16501731
else
16511732
ret = emit_ll_sc_atomic(insn, ctx);
@@ -2669,7 +2750,8 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
26692750
switch (insn->code) {
26702751
case BPF_STX | BPF_ATOMIC | BPF_W:
26712752
case BPF_STX | BPF_ATOMIC | BPF_DW:
2672-
if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
2753+
if (!bpf_atomic_is_load_store(insn) &&
2754+
!cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
26732755
return false;
26742756
}
26752757
return true;

arch/s390/net/bpf_jit_comp.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2919,10 +2919,16 @@ bool bpf_jit_supports_arena(void)
29192919

29202920
bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
29212921
{
2922-
/*
2923-
* Currently the verifier uses this function only to check which
2924-
* atomic stores to arena are supported, and they all are.
2925-
*/
2922+
if (!in_arena)
2923+
return true;
2924+
switch (insn->code) {
2925+
case BPF_STX | BPF_ATOMIC | BPF_B:
2926+
case BPF_STX | BPF_ATOMIC | BPF_H:
2927+
case BPF_STX | BPF_ATOMIC | BPF_W:
2928+
case BPF_STX | BPF_ATOMIC | BPF_DW:
2929+
if (bpf_atomic_is_load_store(insn))
2930+
return false;
2931+
}
29262932
return true;
29272933
}
29282934

arch/x86/net/bpf_jit_comp.c

Lines changed: 82 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1242,8 +1242,8 @@ static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm)
12421242
emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm);
12431243
}
12441244

1245-
static int emit_atomic(u8 **pprog, u8 atomic_op,
1246-
u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
1245+
static int emit_atomic_rmw(u8 **pprog, u32 atomic_op,
1246+
u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
12471247
{
12481248
u8 *prog = *pprog;
12491249

@@ -1283,8 +1283,9 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
12831283
return 0;
12841284
}
12851285

1286-
static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size,
1287-
u32 dst_reg, u32 src_reg, u32 index_reg, int off)
1286+
static int emit_atomic_rmw_index(u8 **pprog, u32 atomic_op, u32 size,
1287+
u32 dst_reg, u32 src_reg, u32 index_reg,
1288+
int off)
12881289
{
12891290
u8 *prog = *pprog;
12901291

@@ -1297,7 +1298,7 @@ static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size,
12971298
EMIT1(add_3mod(0x48, dst_reg, src_reg, index_reg));
12981299
break;
12991300
default:
1300-
pr_err("bpf_jit: 1 and 2 byte atomics are not supported\n");
1301+
pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n");
13011302
return -EFAULT;
13021303
}
13031304

@@ -1331,6 +1332,49 @@ static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size,
13311332
return 0;
13321333
}
13331334

1335+
static int emit_atomic_ld_st(u8 **pprog, u32 atomic_op, u32 dst_reg,
1336+
u32 src_reg, s16 off, u8 bpf_size)
1337+
{
1338+
switch (atomic_op) {
1339+
case BPF_LOAD_ACQ:
1340+
/* dst_reg = smp_load_acquire(src_reg + off16) */
1341+
emit_ldx(pprog, bpf_size, dst_reg, src_reg, off);
1342+
break;
1343+
case BPF_STORE_REL:
1344+
/* smp_store_release(dst_reg + off16, src_reg) */
1345+
emit_stx(pprog, bpf_size, dst_reg, src_reg, off);
1346+
break;
1347+
default:
1348+
pr_err("bpf_jit: unknown atomic load/store opcode %02x\n",
1349+
atomic_op);
1350+
return -EFAULT;
1351+
}
1352+
1353+
return 0;
1354+
}
1355+
1356+
static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size,
1357+
u32 dst_reg, u32 src_reg, u32 index_reg,
1358+
int off)
1359+
{
1360+
switch (atomic_op) {
1361+
case BPF_LOAD_ACQ:
1362+
/* dst_reg = smp_load_acquire(src_reg + idx_reg + off16) */
1363+
emit_ldx_index(pprog, size, dst_reg, src_reg, index_reg, off);
1364+
break;
1365+
case BPF_STORE_REL:
1366+
/* smp_store_release(dst_reg + idx_reg + off16, src_reg) */
1367+
emit_stx_index(pprog, size, dst_reg, src_reg, index_reg, off);
1368+
break;
1369+
default:
1370+
pr_err("bpf_jit: unknown atomic load/store opcode %02x\n",
1371+
atomic_op);
1372+
return -EFAULT;
1373+
}
1374+
1375+
return 0;
1376+
}
1377+
13341378
#define DONT_CLEAR 1
13351379

13361380
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
@@ -2113,6 +2157,13 @@ st: if (is_imm8(insn->off))
21132157
}
21142158
break;
21152159

2160+
case BPF_STX | BPF_ATOMIC | BPF_B:
2161+
case BPF_STX | BPF_ATOMIC | BPF_H:
2162+
if (!bpf_atomic_is_load_store(insn)) {
2163+
pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n");
2164+
return -EFAULT;
2165+
}
2166+
fallthrough;
21162167
case BPF_STX | BPF_ATOMIC | BPF_W:
21172168
case BPF_STX | BPF_ATOMIC | BPF_DW:
21182169
if (insn->imm == (BPF_AND | BPF_FETCH) ||
@@ -2148,10 +2199,10 @@ st: if (is_imm8(insn->off))
21482199
EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)],
21492200
add_2reg(0xC0, AUX_REG, real_src_reg));
21502201
/* Attempt to swap in new value */
2151-
err = emit_atomic(&prog, BPF_CMPXCHG,
2152-
real_dst_reg, AUX_REG,
2153-
insn->off,
2154-
BPF_SIZE(insn->code));
2202+
err = emit_atomic_rmw(&prog, BPF_CMPXCHG,
2203+
real_dst_reg, AUX_REG,
2204+
insn->off,
2205+
BPF_SIZE(insn->code));
21552206
if (WARN_ON(err))
21562207
return err;
21572208
/*
@@ -2166,17 +2217,35 @@ st: if (is_imm8(insn->off))
21662217
break;
21672218
}
21682219

2169-
err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
2170-
insn->off, BPF_SIZE(insn->code));
2220+
if (bpf_atomic_is_load_store(insn))
2221+
err = emit_atomic_ld_st(&prog, insn->imm, dst_reg, src_reg,
2222+
insn->off, BPF_SIZE(insn->code));
2223+
else
2224+
err = emit_atomic_rmw(&prog, insn->imm, dst_reg, src_reg,
2225+
insn->off, BPF_SIZE(insn->code));
21712226
if (err)
21722227
return err;
21732228
break;
21742229

2230+
case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
2231+
case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
2232+
if (!bpf_atomic_is_load_store(insn)) {
2233+
pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n");
2234+
return -EFAULT;
2235+
}
2236+
fallthrough;
21752237
case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
21762238
case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
21772239
start_of_ldx = prog;
2178-
err = emit_atomic_index(&prog, insn->imm, BPF_SIZE(insn->code),
2179-
dst_reg, src_reg, X86_REG_R12, insn->off);
2240+
2241+
if (bpf_atomic_is_load_store(insn))
2242+
err = emit_atomic_ld_st_index(&prog, insn->imm,
2243+
BPF_SIZE(insn->code), dst_reg,
2244+
src_reg, X86_REG_R12, insn->off);
2245+
else
2246+
err = emit_atomic_rmw_index(&prog, insn->imm, BPF_SIZE(insn->code),
2247+
dst_reg, src_reg, X86_REG_R12,
2248+
insn->off);
21802249
if (err)
21812250
return err;
21822251
goto populate_extable;

0 commit comments

Comments
 (0)