Skip to content

Commit 68b5f0e

Browse files
committed
[AArch64][SME] Allow spills of ZT0 arounds SME ABI routines again
In #132722 spills of ZT0 were disabled around all SME ABI routines to avoid a case where ZT0 is spilled before ZA is enabled (resulting in a crash). It turns out that the ABI does not promise that routines will preserve ZT0 (however in practice they do), so generally disabling ZT0 spills for ABI routines is not correct. The case where a crash was possible was "aarch64_new_zt0" functions with ZA disabled on entry and a ZT0 spill around __arm_tpidr2_save. In this case, ZT0 will be undefined at the call to __arm_tpidr2_save, so we can mark the call as preserving ZT0 (whether it does or not) to avoid the ZT0 spills.
1 parent d8b0e61 commit 68b5f0e

File tree

4 files changed

+64
-10
lines changed

4 files changed

+64
-10
lines changed

llvm/lib/Target/AArch64/SMEABIPass.cpp

+12-4
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,22 @@ FunctionPass *llvm::createSMEABIPass() { return new SMEABI(); }
5454
//===----------------------------------------------------------------------===//
5555

5656
// Utility function to emit a call to __arm_tpidr2_save and clear TPIDR2_EL0.
57-
void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) {
57+
void emitTPIDR2Save(Module *M, IRBuilder<> &Builder, bool ZT0IsUndef = false) {
58+
auto &Ctx = M->getContext();
5859
auto *TPIDR2SaveTy =
5960
FunctionType::get(Builder.getVoidTy(), {}, /*IsVarArgs=*/false);
60-
auto Attrs = AttributeList().addFnAttribute(M->getContext(),
61-
"aarch64_pstate_sm_compatible");
61+
auto Attrs =
62+
AttributeList().addFnAttribute(Ctx, "aarch64_pstate_sm_compatible");
6263
FunctionCallee Callee =
6364
M->getOrInsertFunction("__arm_tpidr2_save", TPIDR2SaveTy, Attrs);
6465
CallInst *Call = Builder.CreateCall(Callee);
66+
67+
// If ZT0 is undefined (i.e. we're at the entry of a "new_zt0" function), mark
68+
// __arm_tpidr2_save as preserving ZT0. This prevents an unnecessary spill of
69+
// ZT0 that can occur before ZA is enabled.
70+
if (ZT0IsUndef)
71+
Call->addFnAttr(Attribute::get(Ctx, "aarch64_preserves_zt0"));
72+
6573
Call->setCallingConv(
6674
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0);
6775

@@ -119,7 +127,7 @@ bool SMEABI::updateNewStateFunctions(Module *M, Function *F,
119127

120128
// Create a call __arm_tpidr2_save, which commits the lazy save.
121129
Builder.SetInsertPoint(&SaveBB->back());
122-
emitTPIDR2Save(M, Builder);
130+
emitTPIDR2Save(M, Builder, /*ZT0IsUndef=*/FnAttrs.isNewZT0());
123131

124132
// Enable pstate.za at the start of the function.
125133
Builder.SetInsertPoint(&OrigBB->front());

llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,7 @@ class SMEAttrs {
133133
bool hasZT0State() const { return isNewZT0() || sharesZT0(); }
134134
bool requiresPreservingZT0(const SMEAttrs &Callee) const {
135135
return hasZT0State() && !Callee.sharesZT0() &&
136-
!Callee.hasAgnosticZAInterface() &&
137-
!(Callee.Bitmask & SME_ABI_Routine);
136+
!Callee.hasAgnosticZAInterface();
138137
}
139138
bool requiresDisablingZABeforeCall(const SMEAttrs &Callee) const {
140139
return hasZT0State() && !hasZAState() && Callee.hasPrivateZAInterface() &&
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi %s | FileCheck %s
2+
3+
declare void @callee();
4+
5+
define void @private_za() "aarch64_new_zt0" {
6+
call void @callee()
7+
ret void
8+
}
9+
10+
; CHECK: call aarch64_sme_preservemost_from_x0 void @__arm_tpidr2_save() #[[TPIDR2_SAVE_CALL_ATTR:[0-9]+]]
11+
; CHECK: declare void @__arm_tpidr2_save() #[[TPIDR2_SAVE_DECL_ATTR:[0-9]+]]
12+
13+
; CHECK: attributes #[[TPIDR2_SAVE_DECL_ATTR]] = { "aarch64_pstate_sm_compatible" }
14+
; CHECK: attributes #[[TPIDR2_SAVE_CALL_ATTR]] = { "aarch64_preserves_zt0" }

llvm/test/CodeGen/AArch64/sme-zt0-state.ll

+37-4
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,39 @@ define void @zt0_new_caller_zt0_new_callee() "aarch64_new_zt0" nounwind {
167167
ret void;
168168
}
169169

170+
; Expect commit of lazy-save if ZA is dormant
171+
; Expect smstart ZA & clear ZT0
172+
; No spill & fill of ZT0 around __arm_tpidr2_save
173+
; Expect spill & fill of ZT0 around __arm_sme_state call
174+
; Before return, expect smstop ZA
175+
define i64 @zt0_new_caller_abi_routine_callee() "aarch64_new_zt0" nounwind {
176+
; CHECK-LABEL: zt0_new_caller_abi_routine_callee:
177+
; CHECK: // %bb.0: // %prelude
178+
; CHECK-NEXT: sub sp, sp, #80
179+
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
180+
; CHECK-NEXT: mrs x8, TPIDR2_EL0
181+
; CHECK-NEXT: cbz x8, .LBB7_2
182+
; CHECK-NEXT: // %bb.1: // %save.za
183+
; CHECK-NEXT: bl __arm_tpidr2_save
184+
; CHECK-NEXT: msr TPIDR2_EL0, xzr
185+
; CHECK-NEXT: .LBB7_2:
186+
; CHECK-NEXT: smstart za
187+
; CHECK-NEXT: zero { zt0 }
188+
; CHECK-NEXT: mov x19, sp
189+
; CHECK-NEXT: str zt0, [x19]
190+
; CHECK-NEXT: bl __arm_sme_state
191+
; CHECK-NEXT: ldr zt0, [x19]
192+
; CHECK-NEXT: smstop za
193+
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
194+
; CHECK-NEXT: add sp, sp, #80
195+
; CHECK-NEXT: ret
196+
%res = call {i64, i64} @__arm_sme_state()
197+
%res.0 = extractvalue {i64, i64} %res, 0
198+
ret i64 %res.0
199+
}
200+
201+
declare {i64, i64} @__arm_sme_state()
202+
170203
;
171204
; New-ZA Caller
172205
;
@@ -179,11 +212,11 @@ define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
179212
; CHECK: // %bb.0: // %prelude
180213
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
181214
; CHECK-NEXT: mrs x8, TPIDR2_EL0
182-
; CHECK-NEXT: cbz x8, .LBB7_2
215+
; CHECK-NEXT: cbz x8, .LBB8_2
183216
; CHECK-NEXT: // %bb.1: // %save.za
184217
; CHECK-NEXT: bl __arm_tpidr2_save
185218
; CHECK-NEXT: msr TPIDR2_EL0, xzr
186-
; CHECK-NEXT: .LBB7_2:
219+
; CHECK-NEXT: .LBB8_2:
187220
; CHECK-NEXT: smstart za
188221
; CHECK-NEXT: zero { zt0 }
189222
; CHECK-NEXT: bl callee
@@ -202,11 +235,11 @@ define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind {
202235
; CHECK: // %bb.0: // %prelude
203236
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
204237
; CHECK-NEXT: mrs x8, TPIDR2_EL0
205-
; CHECK-NEXT: cbz x8, .LBB8_2
238+
; CHECK-NEXT: cbz x8, .LBB9_2
206239
; CHECK-NEXT: // %bb.1: // %save.za
207240
; CHECK-NEXT: bl __arm_tpidr2_save
208241
; CHECK-NEXT: msr TPIDR2_EL0, xzr
209-
; CHECK-NEXT: .LBB8_2:
242+
; CHECK-NEXT: .LBB9_2:
210243
; CHECK-NEXT: smstart za
211244
; CHECK-NEXT: zero {za}
212245
; CHECK-NEXT: zero { zt0 }

0 commit comments

Comments
 (0)