Skip to content

Commit f045f2c

Browse files
authored
[AArch64][SME] Fix generating incorrect TBZ when lowering lazy save. (#68429)
After calling arm_sme_state, the -S assembly would show clang generating a “tbz xN, #0, Lbb”. However, disassembling it showed that it was actually encoded as “tbz xN, #32, Lbb”. The issue is that for TBZ, if you want a bit offset <32 you need to use the W variant, since the instruction overloads the top bit of the immediate.
1 parent 7050ff4 commit f045f2c

5 files changed

+27
-25
lines changed

llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -1005,10 +1005,12 @@ AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
10051005
// expected value for the callee (0 for a normal callee and 1 for a streaming
10061006
// callee).
10071007
auto PStateSM = MI.getOperand(2).getReg();
1008+
auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1009+
unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
10081010
bool IsStreamingCallee = MI.getOperand(3).getImm();
1009-
unsigned Opc = IsStreamingCallee ? AArch64::TBZX : AArch64::TBNZX;
1011+
unsigned Opc = IsStreamingCallee ? AArch64::TBZW : AArch64::TBNZW;
10101012
MachineInstrBuilder Tbx =
1011-
BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(PStateSM).addImm(0);
1013+
BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
10121014

10131015
// Split MBB and create two new blocks:
10141016
// - MBB now contains all instructions before MSRcond_pstatesvcrImm1.

llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ define void @streaming_compatible() #0 {
1717
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
1818
; CHECK-NEXT: bl __arm_sme_state
1919
; CHECK-NEXT: and x19, x0, #0x1
20-
; CHECK-NEXT: tbz x19, #0, .LBB0_2
20+
; CHECK-NEXT: tbz w19, #0, .LBB0_2
2121
; CHECK-NEXT: // %bb.1:
2222
; CHECK-NEXT: smstop sm
2323
; CHECK-NEXT: .LBB0_2:
2424
; CHECK-NEXT: bl non_streaming
25-
; CHECK-NEXT: tbz x19, #0, .LBB0_4
25+
; CHECK-NEXT: tbz w19, #0, .LBB0_4
2626
; CHECK-NEXT: // %bb.3:
2727
; CHECK-NEXT: smstart sm
2828
; CHECK-NEXT: .LBB0_4:

llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -413,14 +413,14 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
413413
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
414414
; CHECK-COMMON-NEXT: bl __arm_sme_state
415415
; CHECK-COMMON-NEXT: and x19, x0, #0x1
416-
; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_2
416+
; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2
417417
; CHECK-COMMON-NEXT: // %bb.1:
418418
; CHECK-COMMON-NEXT: smstop sm
419419
; CHECK-COMMON-NEXT: .LBB12_2:
420420
; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
421421
; CHECK-COMMON-NEXT: bl fmodf
422422
; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
423-
; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_4
423+
; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_4
424424
; CHECK-COMMON-NEXT: // %bb.3:
425425
; CHECK-COMMON-NEXT: smstart sm
426426
; CHECK-COMMON-NEXT: .LBB12_4:

llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,12 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_z
134134
; CHECK-NEXT: msr TPIDR2_EL0, x9
135135
; CHECK-NEXT: bl __arm_sme_state
136136
; CHECK-NEXT: and x19, x0, #0x1
137-
; CHECK-NEXT: tbz x19, #0, .LBB3_2
137+
; CHECK-NEXT: tbz w19, #0, .LBB3_2
138138
; CHECK-NEXT: // %bb.1:
139139
; CHECK-NEXT: smstop sm
140140
; CHECK-NEXT: .LBB3_2:
141141
; CHECK-NEXT: bl private_za_callee
142-
; CHECK-NEXT: tbz x19, #0, .LBB3_4
142+
; CHECK-NEXT: tbz w19, #0, .LBB3_4
143143
; CHECK-NEXT: // %bb.3:
144144
; CHECK-NEXT: smstart sm
145145
; CHECK-NEXT: .LBB3_4:
@@ -187,12 +187,12 @@ define void @za_shared_caller_za_preserved_callee() nounwind "aarch64_pstate_za_
187187
; CHECK-NEXT: msr TPIDR2_EL0, x8
188188
; CHECK-NEXT: bl __arm_sme_state
189189
; CHECK-NEXT: and x19, x0, #0x1
190-
; CHECK-NEXT: tbz x19, #0, .LBB4_2
190+
; CHECK-NEXT: tbz w19, #0, .LBB4_2
191191
; CHECK-NEXT: // %bb.1:
192192
; CHECK-NEXT: smstop sm
193193
; CHECK-NEXT: .LBB4_2:
194194
; CHECK-NEXT: bl private_za_preserved_callee
195-
; CHECK-NEXT: tbz x19, #0, .LBB4_4
195+
; CHECK-NEXT: tbz w19, #0, .LBB4_4
196196
; CHECK-NEXT: // %bb.3:
197197
; CHECK-NEXT: smstart sm
198198
; CHECK-NEXT: .LBB4_4:

llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll

+15-15
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,12 @@ define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_comp
4343
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
4444
; CHECK-NEXT: bl __arm_sme_state
4545
; CHECK-NEXT: and x19, x0, #0x1
46-
; CHECK-NEXT: tbz x19, #0, .LBB1_2
46+
; CHECK-NEXT: tbz w19, #0, .LBB1_2
4747
; CHECK-NEXT: // %bb.1:
4848
; CHECK-NEXT: smstop sm
4949
; CHECK-NEXT: .LBB1_2:
5050
; CHECK-NEXT: bl normal_callee
51-
; CHECK-NEXT: tbz x19, #0, .LBB1_4
51+
; CHECK-NEXT: tbz w19, #0, .LBB1_4
5252
; CHECK-NEXT: // %bb.3:
5353
; CHECK-NEXT: smstart sm
5454
; CHECK-NEXT: .LBB1_4:
@@ -79,12 +79,12 @@ define void @streaming_compatible_caller_streaming_callee() "aarch64_pstate_sm_c
7979
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
8080
; CHECK-NEXT: bl __arm_sme_state
8181
; CHECK-NEXT: and x19, x0, #0x1
82-
; CHECK-NEXT: tbnz x19, #0, .LBB2_2
82+
; CHECK-NEXT: tbnz w19, #0, .LBB2_2
8383
; CHECK-NEXT: // %bb.1:
8484
; CHECK-NEXT: smstart sm
8585
; CHECK-NEXT: .LBB2_2:
8686
; CHECK-NEXT: bl streaming_callee
87-
; CHECK-NEXT: tbnz x19, #0, .LBB2_4
87+
; CHECK-NEXT: tbnz w19, #0, .LBB2_4
8888
; CHECK-NEXT: // %bb.3:
8989
; CHECK-NEXT: smstop sm
9090
; CHECK-NEXT: .LBB2_4:
@@ -134,7 +134,7 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
134134
; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
135135
; CHECK-NEXT: bl __arm_sme_state
136136
; CHECK-NEXT: and x19, x0, #0x1
137-
; CHECK-NEXT: tbz x19, #0, .LBB4_2
137+
; CHECK-NEXT: tbz w19, #0, .LBB4_2
138138
; CHECK-NEXT: // %bb.1:
139139
; CHECK-NEXT: smstop sm
140140
; CHECK-NEXT: .LBB4_2:
@@ -143,7 +143,7 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
143143
; CHECK-NEXT: bl normal_callee_vec_arg
144144
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
145145
; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
146-
; CHECK-NEXT: tbz x19, #0, .LBB4_4
146+
; CHECK-NEXT: tbz w19, #0, .LBB4_4
147147
; CHECK-NEXT: // %bb.3:
148148
; CHECK-NEXT: smstart sm
149149
; CHECK-NEXT: .LBB4_4:
@@ -204,14 +204,14 @@ define <vscale x 2 x double> @streaming_compatible_with_scalable_vectors(<vscale
204204
; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
205205
; CHECK-NEXT: bl __arm_sme_state
206206
; CHECK-NEXT: and x19, x0, #0x1
207-
; CHECK-NEXT: tbz x19, #0, .LBB5_2
207+
; CHECK-NEXT: tbz w19, #0, .LBB5_2
208208
; CHECK-NEXT: // %bb.1:
209209
; CHECK-NEXT: smstop sm
210210
; CHECK-NEXT: .LBB5_2:
211211
; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
212212
; CHECK-NEXT: bl normal_callee_scalable_vec_arg
213213
; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
214-
; CHECK-NEXT: tbz x19, #0, .LBB5_4
214+
; CHECK-NEXT: tbz w19, #0, .LBB5_4
215215
; CHECK-NEXT: // %bb.3:
216216
; CHECK-NEXT: smstart sm
217217
; CHECK-NEXT: .LBB5_4:
@@ -296,14 +296,14 @@ define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x
296296
; CHECK-NEXT: str p0, [sp, #7, mul vl] // 2-byte Folded Spill
297297
; CHECK-NEXT: bl __arm_sme_state
298298
; CHECK-NEXT: and x19, x0, #0x1
299-
; CHECK-NEXT: tbz x19, #0, .LBB6_2
299+
; CHECK-NEXT: tbz w19, #0, .LBB6_2
300300
; CHECK-NEXT: // %bb.1:
301301
; CHECK-NEXT: smstop sm
302302
; CHECK-NEXT: .LBB6_2:
303303
; CHECK-NEXT: ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload
304304
; CHECK-NEXT: bl normal_callee_predicate_vec_arg
305305
; CHECK-NEXT: str p0, [sp, #6, mul vl] // 2-byte Folded Spill
306-
; CHECK-NEXT: tbz x19, #0, .LBB6_4
306+
; CHECK-NEXT: tbz w19, #0, .LBB6_4
307307
; CHECK-NEXT: // %bb.3:
308308
; CHECK-NEXT: smstart sm
309309
; CHECK-NEXT: .LBB6_4:
@@ -360,7 +360,7 @@ define i32 @conditional_smstart_unreachable_block() "aarch64_pstate_sm_compatibl
360360
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
361361
; CHECK-NEXT: bl __arm_sme_state
362362
; CHECK-NEXT: and x19, x0, #0x1
363-
; CHECK-NEXT: tbnz x19, #0, .LBB7_2
363+
; CHECK-NEXT: tbnz w19, #0, .LBB7_2
364364
; CHECK-NEXT: // %bb.1:
365365
; CHECK-NEXT: smstart sm
366366
; CHECK-NEXT: .LBB7_2:
@@ -381,12 +381,12 @@ define void @conditional_smstart_no_successor_block(i1 %p) "aarch64_pstate_sm_co
381381
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
382382
; CHECK-NEXT: bl __arm_sme_state
383383
; CHECK-NEXT: and x19, x0, #0x1
384-
; CHECK-NEXT: tbnz x19, #0, .LBB8_3
384+
; CHECK-NEXT: tbnz w19, #0, .LBB8_3
385385
; CHECK-NEXT: // %bb.2: // %if.then
386386
; CHECK-NEXT: smstart sm
387387
; CHECK-NEXT: .LBB8_3: // %if.then
388388
; CHECK-NEXT: bl streaming_callee
389-
; CHECK-NEXT: tbnz x19, #0, .LBB8_5
389+
; CHECK-NEXT: tbnz w19, #0, .LBB8_5
390390
; CHECK-NEXT: // %bb.4: // %if.then
391391
; CHECK-NEXT: smstop sm
392392
; CHECK-NEXT: .LBB8_5: // %if.then
@@ -417,12 +417,12 @@ define void @disable_tailcallopt() "aarch64_pstate_sm_compatible" nounwind {
417417
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
418418
; CHECK-NEXT: bl __arm_sme_state
419419
; CHECK-NEXT: and x19, x0, #0x1
420-
; CHECK-NEXT: tbz x19, #0, .LBB9_2
420+
; CHECK-NEXT: tbz w19, #0, .LBB9_2
421421
; CHECK-NEXT: // %bb.1:
422422
; CHECK-NEXT: smstop sm
423423
; CHECK-NEXT: .LBB9_2:
424424
; CHECK-NEXT: bl normal_callee
425-
; CHECK-NEXT: tbz x19, #0, .LBB9_4
425+
; CHECK-NEXT: tbz w19, #0, .LBB9_4
426426
; CHECK-NEXT: // %bb.3:
427427
; CHECK-NEXT: smstart sm
428428
; CHECK-NEXT: .LBB9_4:

0 commit comments

Comments
 (0)