Skip to content

Commit 4b3b71f

Browse files
committed
[BPF] Add load-acquire and store-release instructions under -mcpu=v4
As discussed in [1], introduce BPF instructions with load-acquire and store-release semantics under -mcpu=v4. The following new flags are defined: BPF_ATOMIC_LOAD 0x10 BPF_ATOMIC_STORE 0x20 BPF_RELAXED: 0x0 BPF_CONSUME: 0x1 BPF_ACQUIRE: 0x2 BPF_RELEASE: 0x3 BPF_ACQ_REL: 0x4 BPF_SEQ_CST: 0x5 A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_ATOMIC_LOAD | BPF_ACQUIRE (0x12). Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_ATOMIC_STORE | BPF_RELEASE (0x23). Unlike existing atomic operations that only support BPF_W (32-bit) and BPF_DW (64-bit) size modifiers, load-acquires and store-releases also support BPF_B (8-bit) and BPF_H (16-bit). An 8- or 16-bit load-acquire zero-extends the value before writing it to a 32-bit register, just like ARM64 instruction LDAPRH and friends. As an example, for -march=bpfel (big-endian): long foo(long *ptr) { return __atomic_load_n(ptr, __ATOMIC_ACQUIRE); } foo() can be compiled to: db 10 00 00 12 00 00 00 r0 = load_acquire((u64 *)(r1 + 0x0)) 95 00 00 00 00 00 00 00 exit opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX imm (0x00000012): BPF_ATOMIC_LOAD | BPF_ACQUIRE Similarly: void bar(short *ptr, short val) { __atomic_store_n(ptr, val, __ATOMIC_RELEASE); } bar() can be compiled to: cb 21 00 00 23 00 00 00 store_release((u16 *)(r1 + 0x0), w2) 95 00 00 00 00 00 00 00 exit opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX imm (0x00000023): BPF_ATOMIC_STORE | BPF_RELEASE Inline assembly is also supported. For example: asm volatile("%0 = load_acquire((u64 *)(%1 + 0x0))" : "=r"(ret) : "r"(ptr) : "memory"); Add two macros, __BPF_FEATURE_LOAD_ACQUIRE and __BPF_FEATURE_STORE_RELEASE, to let developers detect these new features in source code. They can also be disabled using two new llc options, -disable-load-acquire and -disable-store-release, respectively. Also use ACQUIRE or RELEASE if user requested weaker memory orders (RELAXED or CONSUME) until we actually support them. Requesting a stronger memory order (i.e. SEQ_CST) will cause an error. [1] https://lore.kernel.org/all/[email protected]/
1 parent 8ab5d1c commit 4b3b71f

File tree

10 files changed

+281
-5
lines changed

10 files changed

+281
-5
lines changed

clang/lib/Basic/Targets/BPF.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
6767
Builder.defineMacro("__BPF_FEATURE_SDIV_SMOD");
6868
Builder.defineMacro("__BPF_FEATURE_GOTOL");
6969
Builder.defineMacro("__BPF_FEATURE_ST");
70+
Builder.defineMacro("__BPF_FEATURE_LOAD_ACQUIRE");
71+
Builder.defineMacro("__BPF_FEATURE_STORE_RELEASE");
7072
}
7173
}
7274

clang/test/Preprocessor/bpf-predefined-macros.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ int t;
6767
#ifdef __BPF_FEATURE_MAY_GOTO
6868
int u;
6969
#endif
70+
#ifdef __BPF_FEATURE_LOAD_ACQUIRE
71+
int v;
72+
#endif
73+
#ifdef __BPF_FEATURE_STORE_RELEASE
74+
int w;
75+
#endif
7076

7177
// CHECK: int b;
7278
// CHECK: int c;
@@ -106,6 +112,9 @@ int u;
106112
// CPU_V3: int u;
107113
// CPU_V4: int u;
108114

115+
// CPU_V4: int v;
116+
// CPU_V4: int w;
117+
109118
// CPU_GENERIC: int g;
110119

111120
// CPU_PROBE: int f;

llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ struct BPFOperand : public MCParsedAsmOperand {
237237
.Case("exit", true)
238238
.Case("lock", true)
239239
.Case("ld_pseudo", true)
240+
.Case("store_release", true)
240241
.Default(false);
241242
}
242243

@@ -273,6 +274,7 @@ struct BPFOperand : public MCParsedAsmOperand {
273274
.Case("cmpxchg_64", true)
274275
.Case("cmpxchg32_32", true)
275276
.Case("addr_space_cast", true)
277+
.Case("load_acquire", true)
276278
.Default(false);
277279
}
278280
};

llvm/lib/Target/BPF/BPFInstrFormats.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,24 @@ def BPF_END : BPFArithOp<0xd>;
4848
def BPF_XCHG : BPFArithOp<0xe>;
4949
def BPF_CMPXCHG : BPFArithOp<0xf>;
5050

51+
class BPFAtomicLoadStoreOp<bits<4> val> {
52+
bits<4> Value = val;
53+
}
54+
55+
def BPF_ATOMIC_LOAD : BPFAtomicLoadStoreOp<0x1>;
56+
def BPF_ATOMIC_STORE : BPFAtomicLoadStoreOp<0x2>;
57+
58+
class BPFAtomicOrdering<bits<4> val> {
59+
bits<4> Value = val;
60+
}
61+
62+
def BPF_RELAXED : BPFAtomicOrdering<0x0>;
63+
def BPF_CONSUME : BPFAtomicOrdering<0x1>;
64+
def BPF_ACQUIRE : BPFAtomicOrdering<0x2>;
65+
def BPF_RELEASE : BPFAtomicOrdering<0x3>;
66+
def BPF_ACQ_REL : BPFAtomicOrdering<0x4>;
67+
def BPF_SEQ_CST : BPFAtomicOrdering<0x5>;
68+
5169
class BPFEndDir<bits<1> val> {
5270
bits<1> Value = val;
5371
}

llvm/lib/Target/BPF/BPFInstrInfo.td

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ def BPFHasSdivSmod : Predicate<"Subtarget->hasSdivSmod()">;
6060
def BPFNoMovsx : Predicate<"!Subtarget->hasMovsx()">;
6161
def BPFNoBswap : Predicate<"!Subtarget->hasBswap()">;
6262
def BPFHasStoreImm : Predicate<"Subtarget->hasStoreImm()">;
63+
def BPFHasLoadAcquire : Predicate<"Subtarget->hasLoadAcquire()">;
64+
def BPFHasStoreRelease : Predicate<"Subtarget->hasStoreRelease()">;
6365

6466
class ImmediateAsmOperand<string name> : AsmOperandClass {
6567
let Name = name;
@@ -566,6 +568,48 @@ let Predicates = [BPFHasALU32, BPFHasStoreImm] in {
566568
(STB_imm (imm_to_i64 imm:$src), ADDRri:$dst)>;
567569
}
568570

571+
class STORE_RELEASE<BPFWidthModifer SizeOp, string OpcodeStr, RegisterClass RegTp>
572+
: TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
573+
(outs),
574+
(ins RegTp:$src, MEMri:$addr),
575+
"store_release(("#OpcodeStr#" *)($addr), $src)",
576+
[]> {
577+
bits<4> src;
578+
bits<20> addr;
579+
580+
let Inst{51-48} = addr{19-16}; // base reg
581+
let Inst{55-52} = src;
582+
let Inst{47-32} = addr{15-0}; // offset
583+
let Inst{7-4} = BPF_ATOMIC_STORE.Value;
584+
let Inst{3-0} = BPF_RELEASE.Value;
585+
let BPFClass = BPF_STX;
586+
}
587+
588+
class STORE_RELEASEi64<BPFWidthModifer Opc, string OpcodeStr>
589+
: STORE_RELEASE<Opc, OpcodeStr, GPR>;
590+
591+
class relaxed_store<PatFrag base>
592+
: PatFrag<(ops node:$val, node:$ptr), (base node:$val, node:$ptr)> {
593+
let IsAtomic = 1;
594+
let IsAtomicOrderingReleaseOrStronger = 0;
595+
}
596+
597+
class releasing_store<PatFrag base>
598+
: PatFrag<(ops node:$val, node:$ptr), (base node:$val, node:$ptr)> {
599+
let IsAtomic = 1;
600+
let IsAtomicOrderingRelease = 1;
601+
}
602+
603+
let Predicates = [BPFHasStoreRelease] in {
604+
def STDREL : STORE_RELEASEi64<BPF_DW, "u64">;
605+
606+
foreach P = [[relaxed_store<atomic_store_64>, STDREL],
607+
[releasing_store<atomic_store_64>, STDREL],
608+
] in {
609+
def : Pat<(P[0] GPR:$val, ADDRri:$addr), (P[1] GPR:$val, ADDRri:$addr)>;
610+
}
611+
}
612+
569613
// LOAD instructions
570614
class LOAD<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
571615
: TYPE_LD_ST<ModOp.Value, SizeOp.Value,
@@ -622,6 +666,48 @@ let Predicates = [BPFHasLdsx] in {
622666

623667
def LDD : LOADi64<BPF_DW, BPF_MEM, "u64", load>;
624668

669+
class LOAD_ACQUIRE<BPFWidthModifer SizeOp, string OpcodeStr, RegisterClass RegTp>
670+
: TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
671+
(outs RegTp:$dst),
672+
(ins MEMri:$addr),
673+
"$dst = load_acquire(("#OpcodeStr#" *)($addr))",
674+
[]> {
675+
bits<4> dst;
676+
bits<20> addr;
677+
678+
let Inst{51-48} = dst;
679+
let Inst{55-52} = addr{19-16}; // base reg
680+
let Inst{47-32} = addr{15-0}; // offset
681+
let Inst{7-4} = BPF_ATOMIC_LOAD.Value;
682+
let Inst{3-0} = BPF_ACQUIRE.Value;
683+
let BPFClass = BPF_STX;
684+
}
685+
686+
class LOAD_ACQUIREi64<BPFWidthModifer SizeOp, string OpcodeStr>
687+
: LOAD_ACQUIRE<SizeOp, OpcodeStr, GPR>;
688+
689+
class relaxed_load<PatFrags base>
690+
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
691+
let IsAtomic = 1;
692+
let IsAtomicOrderingAcquireOrStronger = 0;
693+
}
694+
695+
class acquiring_load<PatFrags base>
696+
: PatFrag<(ops node:$ptr), (base node:$ptr)> {
697+
let IsAtomic = 1;
698+
let IsAtomicOrderingAcquire = 1;
699+
}
700+
701+
let Predicates = [BPFHasLoadAcquire] in {
702+
def LDDACQ : LOAD_ACQUIREi64<BPF_DW, "u64">;
703+
704+
foreach P = [[relaxed_load<atomic_load_64>, LDDACQ],
705+
[acquiring_load<atomic_load_64>, LDDACQ],
706+
] in {
707+
def : Pat<(P[0] ADDRri:$addr), (P[1] ADDRri:$addr)>;
708+
}
709+
}
710+
625711
class BRANCH<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
626712
: TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
627713
(outs),
@@ -1181,10 +1267,19 @@ class STORE32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern>
11811267
class STOREi32<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode>
11821268
: STORE32<Opc, OpcodeStr, [(OpNode GPR32:$src, ADDRri:$addr)]>;
11831269

1270+
class STORE_RELEASEi32<BPFWidthModifer Opc, string OpcodeStr>
1271+
: STORE_RELEASE<Opc, OpcodeStr, GPR32>;
1272+
11841273
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
11851274
def STW32 : STOREi32<BPF_W, "u32", store>;
11861275
def STH32 : STOREi32<BPF_H, "u16", truncstorei16>;
11871276
def STB32 : STOREi32<BPF_B, "u8", truncstorei8>;
1277+
1278+
let Predicates = [BPFHasStoreRelease] in {
1279+
def STWREL32 : STORE_RELEASEi32<BPF_W, "u32">;
1280+
def STHREL32 : STORE_RELEASEi32<BPF_H, "u16">;
1281+
def STBREL32 : STORE_RELEASEi32<BPF_B, "u8">;
1282+
}
11881283
}
11891284

11901285
class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern>
@@ -1205,10 +1300,19 @@ class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, lis
12051300
class LOADi32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, PatFrag OpNode>
12061301
: LOAD32<SizeOp, ModOp, OpcodeStr, [(set i32:$dst, (OpNode ADDRri:$addr))]>;
12071302

1303+
class LOAD_ACQUIREi32<BPFWidthModifer SizeOp, string OpcodeStr>
1304+
: LOAD_ACQUIRE<SizeOp, OpcodeStr, GPR32>;
1305+
12081306
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
12091307
def LDW32 : LOADi32<BPF_W, BPF_MEM, "u32", load>;
12101308
def LDH32 : LOADi32<BPF_H, BPF_MEM, "u16", zextloadi16>;
12111309
def LDB32 : LOADi32<BPF_B, BPF_MEM, "u8", zextloadi8>;
1310+
1311+
let Predicates = [BPFHasLoadAcquire] in {
1312+
def LDWACQ32 : LOAD_ACQUIREi32<BPF_W, "u32">;
1313+
def LDHACQ32 : LOAD_ACQUIREi32<BPF_H, "u16">;
1314+
def LDBACQ32 : LOAD_ACQUIREi32<BPF_B, "u8">;
1315+
}
12121316
}
12131317

12141318
let Predicates = [BPFHasALU32] in {
@@ -1238,6 +1342,30 @@ let Predicates = [BPFHasALU32] in {
12381342
(SUBREG_TO_REG (i64 0), (LDH32 ADDRri:$src), sub_32)>;
12391343
def : Pat<(i64 (extloadi32 ADDRri:$src)),
12401344
(SUBREG_TO_REG (i64 0), (LDW32 ADDRri:$src), sub_32)>;
1345+
1346+
let Predicates = [BPFHasLoadAcquire] in {
1347+
foreach P = [[relaxed_load<atomic_load_32>, LDWACQ32],
1348+
[relaxed_load<atomic_load_az_16>, LDHACQ32],
1349+
[relaxed_load<atomic_load_az_8>, LDBACQ32],
1350+
[acquiring_load<atomic_load_32>, LDWACQ32],
1351+
[acquiring_load<atomic_load_az_16>, LDHACQ32],
1352+
[acquiring_load<atomic_load_az_8>, LDBACQ32],
1353+
] in {
1354+
def : Pat<(P[0] ADDRri:$addr), (P[1] ADDRri:$addr)>;
1355+
}
1356+
}
1357+
1358+
let Predicates = [BPFHasStoreRelease] in {
1359+
foreach P = [[relaxed_store<atomic_store_32>, STWREL32],
1360+
[relaxed_store<atomic_store_16>, STHREL32],
1361+
[relaxed_store<atomic_store_8>, STBREL32],
1362+
[releasing_store<atomic_store_32>, STWREL32],
1363+
[releasing_store<atomic_store_16>, STHREL32],
1364+
[releasing_store<atomic_store_8>, STBREL32],
1365+
] in {
1366+
def : Pat<(P[0] GPR32:$val, ADDRri:$addr), (P[1] GPR32:$val, ADDRri:$addr)>;
1367+
}
1368+
}
12411369
}
12421370

12431371
let usesCustomInserter = 1, isCodeGenOnly = 1 in {

llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,21 +100,25 @@ static bool isStoreImm(unsigned Opcode) {
100100
}
101101

102102
static bool isStore32(unsigned Opcode) {
103-
return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32;
103+
return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32 ||
104+
Opcode == BPF::STBREL32 || Opcode == BPF::STHREL32 ||
105+
Opcode == BPF::STWREL32;
104106
}
105107

106108
static bool isStore64(unsigned Opcode) {
107109
return Opcode == BPF::STB || Opcode == BPF::STH || Opcode == BPF::STW ||
108-
Opcode == BPF::STD;
110+
Opcode == BPF::STD || Opcode == BPF::STDREL;
109111
}
110112

111113
static bool isLoad32(unsigned Opcode) {
112-
return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32;
114+
return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32 ||
115+
Opcode == BPF::LDBACQ32 || Opcode == BPF::LDHACQ32 ||
116+
Opcode == BPF::LDWACQ32;
113117
}
114118

115119
static bool isLoad64(unsigned Opcode) {
116120
return Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW ||
117-
Opcode == BPF::LDD;
121+
Opcode == BPF::LDD || Opcode == BPF::LDDACQ;
118122
}
119123

120124
static bool isLoadSext(unsigned Opcode) {

llvm/lib/Target/BPF/BPFSubtarget.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ static cl::opt<bool> Disable_gotol("disable-gotol", cl::Hidden, cl::init(false),
4040
static cl::opt<bool>
4141
Disable_StoreImm("disable-storeimm", cl::Hidden, cl::init(false),
4242
cl::desc("Disable BPF_ST (immediate store) insn"));
43+
static cl::opt<bool>
44+
Disable_load_acquire("disable-load-acquire", cl::Hidden, cl::init(false),
45+
cl::desc("Disable load-acquire insns"));
46+
static cl::opt<bool>
47+
Disable_store_release("disable-store-release", cl::Hidden, cl::init(false),
48+
cl::desc("Disable store-release insns"));
4349

4450
void BPFSubtarget::anchor() {}
4551

@@ -62,6 +68,8 @@ void BPFSubtarget::initializeEnvironment() {
6268
HasSdivSmod = false;
6369
HasGotol = false;
6470
HasStoreImm = false;
71+
HasLoadAcquire = false;
72+
HasStoreRelease = false;
6573
}
6674

6775
void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -91,6 +99,8 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
9199
HasSdivSmod = !Disable_sdiv_smod;
92100
HasGotol = !Disable_gotol;
93101
HasStoreImm = !Disable_StoreImm;
102+
HasLoadAcquire = !Disable_load_acquire;
103+
HasStoreRelease = !Disable_store_release;
94104
return;
95105
}
96106
}

llvm/lib/Target/BPF/BPFSubtarget.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
6464
bool UseDwarfRIS;
6565

6666
// whether cpu v4 insns are enabled.
67-
bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm;
67+
bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm,
68+
HasLoadAcquire, HasStoreRelease;
6869

6970
std::unique_ptr<CallLowering> CallLoweringInfo;
7071
std::unique_ptr<InstructionSelector> InstSelector;
@@ -92,6 +93,8 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
9293
bool hasSdivSmod() const { return HasSdivSmod; }
9394
bool hasGotol() const { return HasGotol; }
9495
bool hasStoreImm() const { return HasStoreImm; }
96+
bool hasLoadAcquire() const { return HasLoadAcquire; }
97+
bool hasStoreRelease() const { return HasStoreRelease; }
9598

9699
bool isLittleEndian() const { return IsLittleEndian; }
97100

0 commit comments

Comments
 (0)