Skip to content

Commit 8c7a2ce

Browse files
authored
[AArch64][SME] Allow spills of ZT0 around SME ABI routines again (#136726)
In #132722 spills of ZT0 were disabled around all SME ABI routines to avoid a case where ZT0 is spilled before ZA is enabled (resulting in a crash). It turns out that the ABI does not promise that routines will preserve ZT0 (however in practice they do), so generally disabling ZT0 spills for ABI routines is not correct. The case where a crash was possible was "aarch64_new_zt0" functions with ZA disabled on entry and a ZT0 spill around __arm_tpidr2_save. In this case, ZT0 will be undefined at the call to __arm_tpidr2_save, so this patch avoids the ZT0 spill by marking the callsite with "aarch64_zt0_undef". This attribute only applies to callsites and marks that at the point the call is made ZT0 is not defined, so does not need preserving.
1 parent 576161c commit 8c7a2ce

File tree

8 files changed

+107
-13
lines changed

8 files changed

+107
-13
lines changed

llvm/lib/IR/Verifier.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -2859,6 +2859,9 @@ void Verifier::visitFunction(const Function &F) {
28592859
Check(!Attrs.hasAttrSomewhere(Attribute::ElementType),
28602860
"Attribute 'elementtype' can only be applied to a callsite.", &F);
28612861

2862+
Check(!Attrs.hasFnAttr("aarch64_zt0_undef"),
2863+
"Attribute 'aarch64_zt0_undef' can only be applied to a callsite.");
2864+
28622865
if (Attrs.hasFnAttr(Attribute::Naked))
28632866
for (const Argument &Arg : F.args())
28642867
Check(Arg.use_empty(), "cannot use argument of naked function", &Arg);

llvm/lib/Target/AArch64/SMEABIPass.cpp

+12-4
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,22 @@ FunctionPass *llvm::createSMEABIPass() { return new SMEABI(); }
5454
//===----------------------------------------------------------------------===//
5555

5656
// Utility function to emit a call to __arm_tpidr2_save and clear TPIDR2_EL0.
57-
void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) {
57+
void emitTPIDR2Save(Module *M, IRBuilder<> &Builder, bool ZT0IsUndef = false) {
58+
auto &Ctx = M->getContext();
5859
auto *TPIDR2SaveTy =
5960
FunctionType::get(Builder.getVoidTy(), {}, /*IsVarArgs=*/false);
60-
auto Attrs = AttributeList().addFnAttribute(M->getContext(),
61-
"aarch64_pstate_sm_compatible");
61+
auto Attrs =
62+
AttributeList().addFnAttribute(Ctx, "aarch64_pstate_sm_compatible");
6263
FunctionCallee Callee =
6364
M->getOrInsertFunction("__arm_tpidr2_save", TPIDR2SaveTy, Attrs);
6465
CallInst *Call = Builder.CreateCall(Callee);
66+
67+
// If ZT0 is undefined (i.e. we're at the entry of a "new_zt0" function), mark
68+
// that on the __arm_tpidr2_save call. This prevents an unnecessary spill of
69+
// ZT0 that can occur before ZA is enabled.
70+
if (ZT0IsUndef)
71+
Call->addFnAttr(Attribute::get(Ctx, "aarch64_zt0_undef"));
72+
6573
Call->setCallingConv(
6674
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0);
6775

@@ -119,7 +127,7 @@ bool SMEABI::updateNewStateFunctions(Module *M, Function *F,
119127

120128
// Create a call __arm_tpidr2_save, which commits the lazy save.
121129
Builder.SetInsertPoint(&SaveBB->back());
122-
emitTPIDR2Save(M, Builder);
130+
emitTPIDR2Save(M, Builder, /*ZT0IsUndef=*/FnAttrs.isNewZT0());
123131

124132
// Enable pstate.za at the start of the function.
125133
Builder.SetInsertPoint(&OrigBB->front());

llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
7575
Bitmask |= SM_Body;
7676
if (Attrs.hasFnAttr("aarch64_za_state_agnostic"))
7777
Bitmask |= ZA_State_Agnostic;
78+
if (Attrs.hasFnAttr("aarch64_zt0_undef"))
79+
Bitmask |= ZT0_Undef;
7880
if (Attrs.hasFnAttr("aarch64_in_za"))
7981
Bitmask |= encodeZAState(StateValue::In);
8082
if (Attrs.hasFnAttr("aarch64_out_za"))

llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h

+6-5
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,10 @@ class SMEAttrs {
4343
SM_Body = 1 << 2, // aarch64_pstate_sm_body
4444
SME_ABI_Routine = 1 << 3, // Used for SME ABI routines to avoid lazy saves
4545
ZA_State_Agnostic = 1 << 4,
46-
ZA_Shift = 5,
46+
ZT0_Undef = 1 << 5, // Use to mark ZT0 as undef to avoid spills
47+
ZA_Shift = 6,
4748
ZA_Mask = 0b111 << ZA_Shift,
48-
ZT0_Shift = 8,
49+
ZT0_Shift = 9,
4950
ZT0_Mask = 0b111 << ZT0_Shift
5051
};
5152

@@ -125,16 +126,16 @@ class SMEAttrs {
125126
bool isPreservesZT0() const {
126127
return decodeZT0State(Bitmask) == StateValue::Preserved;
127128
}
129+
bool isUndefZT0() const { return Bitmask & ZT0_Undef; }
128130
bool sharesZT0() const {
129131
StateValue State = decodeZT0State(Bitmask);
130132
return State == StateValue::In || State == StateValue::Out ||
131133
State == StateValue::InOut || State == StateValue::Preserved;
132134
}
133135
bool hasZT0State() const { return isNewZT0() || sharesZT0(); }
134136
bool requiresPreservingZT0(const SMEAttrs &Callee) const {
135-
return hasZT0State() && !Callee.sharesZT0() &&
136-
!Callee.hasAgnosticZAInterface() &&
137-
!(Callee.Bitmask & SME_ABI_Routine);
137+
return hasZT0State() && !Callee.isUndefZT0() && !Callee.sharesZT0() &&
138+
!Callee.hasAgnosticZAInterface();
138139
}
139140
bool requiresDisablingZABeforeCall(const SMEAttrs &Callee) const {
140141
return hasZT0State() && !hasZAState() && Callee.hasPrivateZAInterface() &&
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi %s | FileCheck %s
2+
3+
declare void @callee();
4+
5+
define void @private_za() "aarch64_new_zt0" {
6+
call void @callee()
7+
ret void
8+
}
9+
10+
; CHECK: call aarch64_sme_preservemost_from_x0 void @__arm_tpidr2_save() #[[TPIDR2_SAVE_CALL_ATTR:[0-9]+]]
11+
; CHECK: declare void @__arm_tpidr2_save() #[[TPIDR2_SAVE_DECL_ATTR:[0-9]+]]
12+
13+
; CHECK: attributes #[[TPIDR2_SAVE_DECL_ATTR]] = { "aarch64_pstate_sm_compatible" }
14+
; CHECK: attributes #[[TPIDR2_SAVE_CALL_ATTR]] = { "aarch64_zt0_undef" }

llvm/test/CodeGen/AArch64/sme-zt0-state.ll

+37-4
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,39 @@ define void @zt0_new_caller_zt0_new_callee() "aarch64_new_zt0" nounwind {
167167
ret void;
168168
}
169169

170+
; Expect commit of lazy-save if ZA is dormant
171+
; Expect smstart ZA & clear ZT0
172+
; No spill & fill of ZT0 around __arm_tpidr2_save
173+
; Expect spill & fill of ZT0 around __arm_sme_state call
174+
; Before return, expect smstop ZA
175+
define i64 @zt0_new_caller_abi_routine_callee() "aarch64_new_zt0" nounwind {
176+
; CHECK-LABEL: zt0_new_caller_abi_routine_callee:
177+
; CHECK: // %bb.0: // %prelude
178+
; CHECK-NEXT: sub sp, sp, #80
179+
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
180+
; CHECK-NEXT: mrs x8, TPIDR2_EL0
181+
; CHECK-NEXT: cbz x8, .LBB7_2
182+
; CHECK-NEXT: // %bb.1: // %save.za
183+
; CHECK-NEXT: bl __arm_tpidr2_save
184+
; CHECK-NEXT: msr TPIDR2_EL0, xzr
185+
; CHECK-NEXT: .LBB7_2:
186+
; CHECK-NEXT: smstart za
187+
; CHECK-NEXT: zero { zt0 }
188+
; CHECK-NEXT: mov x19, sp
189+
; CHECK-NEXT: str zt0, [x19]
190+
; CHECK-NEXT: bl __arm_sme_state
191+
; CHECK-NEXT: ldr zt0, [x19]
192+
; CHECK-NEXT: smstop za
193+
; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
194+
; CHECK-NEXT: add sp, sp, #80
195+
; CHECK-NEXT: ret
196+
%res = call {i64, i64} @__arm_sme_state()
197+
%res.0 = extractvalue {i64, i64} %res, 0
198+
ret i64 %res.0
199+
}
200+
201+
declare {i64, i64} @__arm_sme_state()
202+
170203
;
171204
; New-ZA Caller
172205
;
@@ -179,11 +212,11 @@ define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
179212
; CHECK: // %bb.0: // %prelude
180213
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
181214
; CHECK-NEXT: mrs x8, TPIDR2_EL0
182-
; CHECK-NEXT: cbz x8, .LBB7_2
215+
; CHECK-NEXT: cbz x8, .LBB8_2
183216
; CHECK-NEXT: // %bb.1: // %save.za
184217
; CHECK-NEXT: bl __arm_tpidr2_save
185218
; CHECK-NEXT: msr TPIDR2_EL0, xzr
186-
; CHECK-NEXT: .LBB7_2:
219+
; CHECK-NEXT: .LBB8_2:
187220
; CHECK-NEXT: smstart za
188221
; CHECK-NEXT: zero { zt0 }
189222
; CHECK-NEXT: bl callee
@@ -202,11 +235,11 @@ define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind {
202235
; CHECK: // %bb.0: // %prelude
203236
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
204237
; CHECK-NEXT: mrs x8, TPIDR2_EL0
205-
; CHECK-NEXT: cbz x8, .LBB8_2
238+
; CHECK-NEXT: cbz x8, .LBB9_2
206239
; CHECK-NEXT: // %bb.1: // %save.za
207240
; CHECK-NEXT: bl __arm_tpidr2_save
208241
; CHECK-NEXT: msr TPIDR2_EL0, xzr
209-
; CHECK-NEXT: .LBB8_2:
242+
; CHECK-NEXT: .LBB9_2:
210243
; CHECK-NEXT: smstart za
211244
; CHECK-NEXT: zero {za}
212245
; CHECK-NEXT: zero { zt0 }

llvm/test/Verifier/sme-attributes.ll

+3
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,6 @@ declare void @zt0_inout_out() "aarch64_inout_zt0" "aarch64_out_zt0";
6868

6969
declare void @zt0_inout_agnostic() "aarch64_inout_zt0" "aarch64_za_state_agnostic";
7070
; CHECK: Attributes 'aarch64_new_zt0', 'aarch64_in_zt0', 'aarch64_out_zt0', 'aarch64_inout_zt0', 'aarch64_preserves_zt0' and 'aarch64_za_state_agnostic' are mutually exclusive
71+
72+
declare void @zt0_undef_function() "aarch64_zt0_undef";
73+
; CHECK: Attribute 'aarch64_zt0_undef' can only be applied to a callsite.

llvm/unittests/Target/AArch64/SMEAttributesTest.cpp

+30
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "Utils/AArch64SMEAttributes.h"
22
#include "llvm/AsmParser/Parser.h"
33
#include "llvm/IR/Function.h"
4+
#include "llvm/IR/InstrTypes.h"
45
#include "llvm/IR/Module.h"
56
#include "llvm/Support/SourceMgr.h"
67

@@ -69,6 +70,15 @@ TEST(SMEAttributes, Constructors) {
6970
ASSERT_TRUE(SA(*parseIR("declare void @foo() \"aarch64_new_zt0\"")
7071
->getFunction("foo"))
7172
.isNewZT0());
73+
ASSERT_TRUE(
74+
SA(cast<CallBase>((parseIR("declare void @callee()\n"
75+
"define void @foo() {"
76+
"call void @callee() \"aarch64_zt0_undef\"\n"
77+
"ret void\n}")
78+
->getFunction("foo")
79+
->begin()
80+
->front())))
81+
.isUndefZT0());
7282

7383
// Invalid combinations.
7484
EXPECT_DEBUG_DEATH(SA(SA::SM_Enabled | SA::SM_Compatible),
@@ -215,6 +225,18 @@ TEST(SMEAttributes, Basics) {
215225
ASSERT_FALSE(ZT0_New.hasSharedZAInterface());
216226
ASSERT_TRUE(ZT0_New.hasPrivateZAInterface());
217227

228+
SA ZT0_Undef = SA(SA::ZT0_Undef | SA::encodeZT0State(SA::StateValue::New));
229+
ASSERT_TRUE(ZT0_Undef.isNewZT0());
230+
ASSERT_FALSE(ZT0_Undef.isInZT0());
231+
ASSERT_FALSE(ZT0_Undef.isOutZT0());
232+
ASSERT_FALSE(ZT0_Undef.isInOutZT0());
233+
ASSERT_FALSE(ZT0_Undef.isPreservesZT0());
234+
ASSERT_FALSE(ZT0_Undef.sharesZT0());
235+
ASSERT_TRUE(ZT0_Undef.hasZT0State());
236+
ASSERT_FALSE(ZT0_Undef.hasSharedZAInterface());
237+
ASSERT_TRUE(ZT0_Undef.hasPrivateZAInterface());
238+
ASSERT_TRUE(ZT0_Undef.isUndefZT0());
239+
218240
ASSERT_FALSE(SA(SA::Normal).isInZT0());
219241
ASSERT_FALSE(SA(SA::Normal).isOutZT0());
220242
ASSERT_FALSE(SA(SA::Normal).isInOutZT0());
@@ -285,6 +307,7 @@ TEST(SMEAttributes, Transitions) {
285307
SA ZT0_Shared = SA(SA::encodeZT0State(SA::StateValue::In));
286308
SA ZA_ZT0_Shared = SA(SA::encodeZAState(SA::StateValue::In) |
287309
SA::encodeZT0State(SA::StateValue::In));
310+
SA Undef_ZT0 = SA(SA::ZT0_Undef);
288311

289312
// Shared ZA -> Private ZA Interface
290313
ASSERT_FALSE(ZA_Shared.requiresDisablingZABeforeCall(Private_ZA));
@@ -295,6 +318,13 @@ TEST(SMEAttributes, Transitions) {
295318
ASSERT_TRUE(ZT0_Shared.requiresPreservingZT0(Private_ZA));
296319
ASSERT_TRUE(ZT0_Shared.requiresEnablingZAAfterCall(Private_ZA));
297320

321+
// Shared Undef ZT0 -> Private ZA Interface
322+
// Note: "Undef ZT0" is a callsite attribute that means ZT0 is undefined at
323+
// point the of the call.
324+
ASSERT_TRUE(ZT0_Shared.requiresDisablingZABeforeCall(Undef_ZT0));
325+
ASSERT_FALSE(ZT0_Shared.requiresPreservingZT0(Undef_ZT0));
326+
ASSERT_TRUE(ZT0_Shared.requiresEnablingZAAfterCall(Undef_ZT0));
327+
298328
// Shared ZA & ZT0 -> Private ZA Interface
299329
ASSERT_FALSE(ZA_ZT0_Shared.requiresDisablingZABeforeCall(Private_ZA));
300330
ASSERT_TRUE(ZA_ZT0_Shared.requiresPreservingZT0(Private_ZA));

0 commit comments

Comments
 (0)