Skip to content

Commit 569502c

Browse files
sdesmalen-armDanielCChen
authored andcommitted
[AArch64][SME] Fix iterator to fixupCalleeSaveRestoreStackOffset (llvm#110855)
The iterator passed to `fixupCalleeSaveRestoreStackOffset` may be incorrect when it tries to skip over the instructions that get the current value of 'vg', when there is a 'rdsvl' instruction straight after the prologue. That's because it doesn't check that the instruction is still a 'frame-setup' instruction.
1 parent fe1f600 commit 569502c

File tree

2 files changed

+41
-6
lines changed

2 files changed

+41
-6
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

+3-6
Original file line numberDiff line numberDiff line change
@@ -1953,12 +1953,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
19531953
// pointer bump above.
19541954
while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
19551955
!IsSVECalleeSave(MBBI)) {
1956-
// Move past instructions generated to calculate VG
1957-
if (requiresSaveVG(MF))
1958-
while (isVGInstruction(MBBI))
1959-
++MBBI;
1960-
1961-
if (CombineSPBump)
1956+
if (CombineSPBump &&
1957+
// Only fix-up frame-setup load/store instructions.
1958+
(!requiresSaveVG(MF) || !isVGInstruction(MBBI)))
19621959
fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
19631960
NeedsWinCFI, &HasWinCFI);
19641961
++MBBI;

llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll

+38
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,44 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 {
10661066
ret void
10671067
}
10681068

1069+
; The algorithm that fixes up the offsets of the callee-save/restore
1070+
; instructions must jump over the instructions that instantiate the current
1071+
; 'VG' value. We must make sure that it doesn't consider any RDSVL in
1072+
; user-code as if it is part of the frame-setup when doing so.
1073+
define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind {
1074+
; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue:
1075+
; NO-SVE-CHECK: // %bb.0:
1076+
; NO-SVE-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill
1077+
; NO-SVE-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
1078+
; NO-SVE-CHECK-NEXT: mov x9, x0
1079+
; NO-SVE-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
1080+
; NO-SVE-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
1081+
; NO-SVE-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
1082+
; NO-SVE-CHECK-NEXT: bl __arm_get_current_vg
1083+
; NO-SVE-CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
1084+
; NO-SVE-CHECK-NEXT: mov x0, x9
1085+
; NO-SVE-CHECK-NEXT: rdsvl x8, #1
1086+
; NO-SVE-CHECK-NEXT: add x29, sp, #64
1087+
; NO-SVE-CHECK-NEXT: lsr x8, x8, #3
1088+
; NO-SVE-CHECK-NEXT: mov x1, x0
1089+
; NO-SVE-CHECK-NEXT: smstart sm
1090+
; NO-SVE-CHECK-NEXT: mov x0, x8
1091+
; NO-SVE-CHECK-NEXT: bl bar
1092+
; NO-SVE-CHECK-NEXT: smstop sm
1093+
; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
1094+
; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
1095+
; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
1096+
; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
1097+
; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload
1098+
; NO-SVE-CHECK-NEXT: ret
1099+
%some_alloc = alloca i64, align 8
1100+
%rdsvl = tail call i64 @llvm.aarch64.sme.cntsd()
1101+
call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled"
1102+
ret void
1103+
}
1104+
1105+
declare void @bar(i64, i64)
1106+
10691107
; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables
10701108
; if the function contains a streaming-mode change.
10711109

0 commit comments

Comments
 (0)