Skip to content

Commit c4fa68c

Browse files
committed
[AArch64][PAC] Eliminate excessive MOVs when computing blend
As function calls do not generally preserve X16 and X17, it is beneficial to allow AddrDisc operand of B(L)RA instruction to reside in these registers and make use of this condition when computing the discriminator. This can save up to two MOVs in cases such as loading a (signed) virtual function pointer via a (signed) pointer to vtable, for example ldr x9, [x16] mov x8, x16 mov x17, x8 movk x17, #34646, lsl #48 blraa x9, x17 can be simplified to ldr x8, [x16] movk x16, #34646, lsl #48 blraa x8, x16
1 parent db6037e commit c4fa68c

File tree

3 files changed

+89
-50
lines changed

3 files changed

+89
-50
lines changed

llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp

+53-41
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,15 @@ class AArch64AsmPrinter : public AsmPrinter {
163163
// Emit the sequence for AUT or AUTPAC.
164164
void emitPtrauthAuthResign(const MachineInstr *MI);
165165

166-
// Emit the sequence to compute a discriminator into x17, or reuse AddrDisc.
167-
unsigned emitPtrauthDiscriminator(uint16_t Disc, unsigned AddrDisc);
166+
// Emit the sequence to compute the discriminator.
167+
// ScratchReg should be x16/x17.
168+
// The returned register is either unmodified AddrDisc or x16/x17.
169+
// If the expanded pseudo is allowed to clobber AddrDisc register, setting
170+
// MayUseAddrAsScratch may save one MOV instruction, provided the address
171+
// is already in x16/x17.
172+
Register emitPtrauthDiscriminator(uint16_t Disc, Register AddrDisc,
173+
Register ScratchReg,
174+
bool MayUseAddrAsScratch = false);
168175

169176
// Emit the sequence for LOADauthptrstatic
170177
void LowerLOADauthptrstatic(const MachineInstr &MI);
@@ -1727,8 +1734,10 @@ void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) {
17271734
}
17281735
}
17291736

1730-
unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
1731-
unsigned AddrDisc) {
1737+
Register AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
1738+
Register AddrDisc,
1739+
Register ScratchReg,
1740+
bool MayUseAddrAsScratch) {
17321741
// So far we've used NoRegister in pseudos. Now we need real encodings.
17331742
if (AddrDisc == AArch64::NoRegister)
17341743
AddrDisc = AArch64::XZR;
@@ -1738,16 +1747,24 @@ unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
17381747
if (!Disc)
17391748
return AddrDisc;
17401749

1741-
// If there's only a constant discriminator, MOV it into x17.
1750+
// If there's only a constant discriminator, MOV it into the scratch register.
17421751
if (AddrDisc == AArch64::XZR) {
1743-
emitMOVZ(AArch64::X17, Disc, 0);
1744-
return AArch64::X17;
1752+
emitMOVZ(ScratchReg, Disc, 0);
1753+
return ScratchReg;
17451754
}
17461755

1747-
// If there are both, emit a blend into x17.
1748-
emitMovXReg(AArch64::X17, AddrDisc);
1749-
emitMOVK(AArch64::X17, Disc, 48);
1750-
return AArch64::X17;
1756+
// If there are both, emit a blend into the scratch register.
1757+
1758+
// Check if we can save one MOV instruction.
1759+
assert(MayUseAddrAsScratch || ScratchReg != AddrDisc);
1760+
bool AddrDiscIsSafe = AddrDisc == AArch64::X16 || AddrDisc == AArch64::X17;
1761+
if (MayUseAddrAsScratch && AddrDiscIsSafe)
1762+
ScratchReg = AddrDisc;
1763+
else
1764+
emitMovXReg(ScratchReg, AddrDisc);
1765+
1766+
emitMOVK(ScratchReg, Disc, 48);
1767+
return ScratchReg;
17511768
}
17521769

17531770
/// Emits a code sequence to check an authenticated pointer value.
@@ -1964,7 +1981,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {
19641981

19651982
// Compute aut discriminator into x17
19661983
assert(isUInt<16>(AUTDisc));
1967-
unsigned AUTDiscReg = emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc);
1984+
Register AUTDiscReg =
1985+
emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc, AArch64::X17);
19681986
bool AUTZero = AUTDiscReg == AArch64::XZR;
19691987
unsigned AUTOpc = getAUTOpcodeForKey(AUTKey, AUTZero);
19701988

@@ -2005,7 +2023,8 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {
20052023

20062024
// Compute pac discriminator into x17
20072025
assert(isUInt<16>(PACDisc));
2008-
unsigned PACDiscReg = emitPtrauthDiscriminator(PACDisc, PACAddrDisc);
2026+
Register PACDiscReg =
2027+
emitPtrauthDiscriminator(PACDisc, PACAddrDisc, AArch64::X17);
20092028
bool PACZero = PACDiscReg == AArch64::XZR;
20102029
unsigned PACOpc = getPACOpcodeForKey(PACKey, PACZero);
20112030

@@ -2037,8 +2056,17 @@ void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) {
20372056

20382057
unsigned AddrDisc = MI->getOperand(3).getReg();
20392058

2040-
// Compute discriminator into x17
2041-
unsigned DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc);
2059+
// Make sure AddrDisc is solely used to compute the discriminator.
2060+
// While hardly meaningful, it is still possible to describe an authentication
2061+
// of a pointer against its own value (instead of storage address) with
2062+
// intrinsics, so use report_fatal_error instead of assert.
2063+
if (BrTarget == AddrDisc)
2064+
report_fatal_error("Branch target is signed with its own value");
2065+
2066+
// x16 and x17 are implicit-def'ed by MI, and AddrDisc is not used as any
2067+
// other input, so try to save one MOV by setting MayUseAddrAsScratch.
2068+
Register DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, AArch64::X17,
2069+
/*MayUseAddrAsScratch=*/true);
20422070
bool IsZeroDisc = DiscReg == AArch64::XZR;
20432071

20442072
unsigned Opc;
@@ -2332,16 +2360,7 @@ void AArch64AsmPrinter::LowerMOVaddrPAC(const MachineInstr &MI) {
23322360
}
23332361
}
23342362

2335-
unsigned DiscReg = AddrDisc;
2336-
if (Disc != 0) {
2337-
if (AddrDisc != AArch64::XZR) {
2338-
emitMovXReg(AArch64::X17, AddrDisc);
2339-
emitMOVK(AArch64::X17, Disc, 48);
2340-
} else {
2341-
emitMOVZ(AArch64::X17, Disc, 0);
2342-
}
2343-
DiscReg = AArch64::X17;
2344-
}
2363+
Register DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, AArch64::X17);
23452364

23462365
auto MIB = MCInstBuilder(getPACOpcodeForKey(Key, DiscReg == AArch64::XZR))
23472366
.addReg(AArch64::X16)
@@ -2609,6 +2628,7 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
26092628
// instruction here.
26102629
case AArch64::AUTH_TCRETURN:
26112630
case AArch64::AUTH_TCRETURN_BTI: {
2631+
Register Callee = MI->getOperand(0).getReg();
26122632
const uint64_t Key = MI->getOperand(2).getImm();
26132633
assert((Key == AArch64PACKey::IA || Key == AArch64PACKey::IB) &&
26142634
"Invalid auth key for tail-call return");
@@ -2618,31 +2638,23 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
26182638

26192639
Register AddrDisc = MI->getOperand(4).getReg();
26202640

2621-
Register ScratchReg = MI->getOperand(0).getReg() == AArch64::X16
2622-
? AArch64::X17
2623-
: AArch64::X16;
2641+
Register ScratchReg = Callee == AArch64::X16 ? AArch64::X17 : AArch64::X16;
26242642

26252643
emitPtrauthTailCallHardening(MI);
26262644

2627-
unsigned DiscReg = AddrDisc;
2628-
if (Disc) {
2629-
if (AddrDisc != AArch64::NoRegister) {
2630-
if (ScratchReg != AddrDisc)
2631-
emitMovXReg(ScratchReg, AddrDisc);
2632-
emitMOVK(ScratchReg, Disc, 48);
2633-
} else {
2634-
emitMOVZ(ScratchReg, Disc, 0);
2635-
}
2636-
DiscReg = ScratchReg;
2637-
}
2645+
// See the comments in emitPtrauthBranch.
2646+
if (Callee == AddrDisc)
2647+
report_fatal_error("Call target is signed with its own value");
2648+
Register DiscReg = emitPtrauthDiscriminator(Disc, AddrDisc, ScratchReg,
2649+
/*MayUseAddrAsScratch=*/true);
26382650

2639-
const bool IsZero = DiscReg == AArch64::NoRegister;
2651+
const bool IsZero = DiscReg == AArch64::XZR;
26402652
const unsigned Opcodes[2][2] = {{AArch64::BRAA, AArch64::BRAAZ},
26412653
{AArch64::BRAB, AArch64::BRABZ}};
26422654

26432655
MCInst TmpInst;
26442656
TmpInst.setOpcode(Opcodes[Key][IsZero]);
2645-
TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg()));
2657+
TmpInst.addOperand(MCOperand::createReg(Callee));
26462658
if (!IsZero)
26472659
TmpInst.addOperand(MCOperand::createReg(DiscReg));
26482660
EmitToStreamer(*OutStreamer, TmpInst);

llvm/lib/Target/AArch64/AArch64InstrInfo.td

+9-9
Original file line numberDiff line numberDiff line change
@@ -1840,36 +1840,36 @@ let Predicates = [HasPAuth] in {
18401840
// materialization here), in part because they're handled in a safer way by
18411841
// the kernel, notably on Darwin.
18421842
def BLRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1843-
GPR64noip:$AddrDisc),
1843+
GPR64:$AddrDisc),
18441844
[(AArch64authcall GPR64noip:$Rn, timm:$Key, timm:$Disc,
1845-
GPR64noip:$AddrDisc)]>, Sched<[]> {
1845+
GPR64:$AddrDisc)]>, Sched<[]> {
18461846
let isCodeGenOnly = 1;
18471847
let hasSideEffects = 1;
18481848
let mayStore = 0;
18491849
let mayLoad = 0;
18501850
let isCall = 1;
18511851
let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
1852-
let Defs = [X17,LR];
1852+
let Defs = [X16,X17,LR];
18531853
let Uses = [SP];
18541854
}
18551855

18561856
def BLRA_RVMARKER : Pseudo<
18571857
(outs), (ins i64imm:$rvfunc, GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1858-
GPR64noip:$AddrDisc),
1858+
GPR64:$AddrDisc),
18591859
[(AArch64authcall_rvmarker tglobaladdr:$rvfunc,
18601860
GPR64noip:$Rn, timm:$Key, timm:$Disc,
1861-
GPR64noip:$AddrDisc)]>, Sched<[]> {
1861+
GPR64:$AddrDisc)]>, Sched<[]> {
18621862
let isCodeGenOnly = 1;
18631863
let isCall = 1;
1864-
let Defs = [X17,LR];
1864+
let Defs = [X16,X17,LR];
18651865
let Uses = [SP];
18661866
}
18671867

18681868
// BRA pseudo, generalized version of BRAA/BRAB/Z.
18691869
// This directly manipulates x16/x17, which are the only registers the OS
18701870
// guarantees are safe to use for sensitive operations.
18711871
def BRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1872-
GPR64noip:$AddrDisc), []>, Sched<[]> {
1872+
GPR64:$AddrDisc), []>, Sched<[]> {
18731873
let isCodeGenOnly = 1;
18741874
let hasNoSchedulingInfo = 1;
18751875
let hasSideEffects = 1;
@@ -1880,7 +1880,7 @@ let Predicates = [HasPAuth] in {
18801880
let isBarrier = 1;
18811881
let isIndirectBranch = 1;
18821882
let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
1883-
let Defs = [X17];
1883+
let Defs = [X16,X17];
18841884
}
18851885

18861886
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
@@ -1971,7 +1971,7 @@ let Predicates = [HasPAuth] in {
19711971
// make sure at least one register is usable as a scratch one - for that
19721972
// purpose, use tcGPRnotx16x17 register class for the second operand.
19731973
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Size = 16,
1974-
Uses = [SP] in {
1974+
Defs = [X16,X17], Uses = [SP] in {
19751975
def AUTH_TCRETURN
19761976
: Pseudo<(outs), (ins tcGPRnotx16x17:$dst, i32imm:$FPDiff, i32imm:$Key,
19771977
i64imm:$Disc, tcGPR64:$AddrDisc),

llvm/test/CodeGen/AArch64/ptrauth-call.ll

+27
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,33 @@ define void @test_tailcall_omit_mov_x16_x16(ptr %objptr) #0 {
188188
ret void
189189
}
190190

191+
define i32 @test_call_omit_extra_moves(ptr %objptr) #0 {
192+
; CHECK-LABEL: test_call_omit_extra_moves:
193+
; DARWIN-NEXT: stp x29, x30, [sp, #-16]!
194+
; ELF-NEXT: str x30, [sp, #-16]!
195+
; CHECK-NEXT: ldr x16, [x0]
196+
; CHECK-NEXT: mov x17, x0
197+
; CHECK-NEXT: movk x17, #6503, lsl #48
198+
; CHECK-NEXT: autda x16, x17
199+
; CHECK-NEXT: ldr x8, [x16]
200+
; CHECK-NEXT: movk x16, #34646, lsl #48
201+
; CHECK-NEXT: blraa x8, x16
202+
; CHECK-NEXT: mov w0, #42
203+
; DARWIN-NEXT: ldp x29, x30, [sp], #16
204+
; ELF-NEXT: ldr x30, [sp], #16
205+
; CHECK-NEXT: ret
206+
%vtable.signed = load ptr, ptr %objptr
207+
%objptr.int = ptrtoint ptr %objptr to i64
208+
%vtable.discr = tail call i64 @llvm.ptrauth.blend(i64 %objptr.int, i64 6503)
209+
%vtable.signed.int = ptrtoint ptr %vtable.signed to i64
210+
%vtable.int = tail call i64 @llvm.ptrauth.auth(i64 %vtable.signed.int, i32 2, i64 %vtable.discr)
211+
%vtable = inttoptr i64 %vtable.int to ptr
212+
%callee.signed = load ptr, ptr %vtable
213+
%callee.discr = tail call i64 @llvm.ptrauth.blend(i64 %vtable.int, i64 34646)
214+
%call.result = tail call i32 %callee.signed(ptr %objptr) [ "ptrauth"(i32 0, i64 %callee.discr) ]
215+
ret i32 42
216+
}
217+
191218
define i32 @test_call_ia_arg(ptr %arg0, i64 %arg1) #0 {
192219
; DARWIN-LABEL: test_call_ia_arg:
193220
; DARWIN-NEXT: stp x29, x30, [sp, #-16]!

0 commit comments

Comments
 (0)