diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 866f113640047..15d8502c2367e 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -553,6 +553,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock( MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, uint64_t AlignOffset) const { + const bool NeedsDwarfCFI = needsDwarfCFI(MF); + const bool HasFP = hasFP(MF); const X86Subtarget &STI = MF.getSubtarget(); const X86TargetLowering &TLI = *STI.getTargetLowering(); const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); @@ -570,6 +572,11 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock( .addReg(StackPtr) .addImm(StackProbeSize - AlignOffset) .setMIFlag(MachineInstr::FrameSetup); + if (!HasFP && NeedsDwarfCFI) { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createAdjustCfaOffset( + nullptr, StackProbeSize - AlignOffset)); + } MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) @@ -591,7 +598,11 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock( .setMIFlag(MachineInstr::FrameSetup); MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. - + if (!HasFP && NeedsDwarfCFI) { + BuildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize)); + } addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) .setMIFlag(MachineInstr::FrameSetup), StackPtr, false, 0) @@ -607,6 +618,8 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock( .addReg(StackPtr) .addImm(ChunkSize) .setMIFlag(MachineInstr::FrameSetup); + // No need to adjust Dwarf CFA offset here, the last position of the stack has + // been defined MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. } @@ -1200,6 +1213,13 @@ bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const { return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone); } +bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const { + return MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); +} + +bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const { + return !isWin64Prologue(MF) && MF.needsFrameMoves(); +} /// emitPrologue - Push callee-saved registers onto the stack, which /// automatically adjust the stack pointer. Adjust the stack pointer to allocate @@ -1305,13 +1325,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR; bool IsClrFunclet = IsFunclet && FnHasClrFunclet; bool HasFP = hasFP(MF); - bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool IsWin64Prologue = isWin64Prologue(MF); bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry(); // FIXME: Emit FPO data for EH funclets. bool NeedsWinFPO = !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag(); bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; - bool NeedsDwarfCFI = !IsWin64Prologue && MF.needsFrameMoves(); + bool NeedsDwarfCFI = needsDwarfCFI(MF); Register FramePtr = TRI->getFrameRegister(MF); const Register MachineFramePtr = STI.isTarget64BitILP32() diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index 26e80811af2e5..322aa6fbbfb8f 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -192,6 +192,10 @@ class X86FrameLowering : public TargetFrameLowering { bool has128ByteRedZone(const MachineFunction& MF) const; private: + bool isWin64Prologue(const MachineFunction &MF) const; + + bool needsDwarfCFI(const MachineFunction &MF) const; + uint64_t calculateMaxStackAlign(const MachineFunction &MF) const; /// Emit target stack probe as a call to a helper function diff --git a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll index 0fe492a93d0e1..ed7cb481bbbec 100644 --- a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes-mutliple-objects.ll @@ -8,6 +8,7 @@ define i32 @foo() local_unnamed_addr #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4096 ; CHECK-NEXT: movq $0, (%rsp) ; CHECK-NEXT: subq $1784, %rsp # imm = 0x6F8 ; CHECK-NEXT: .cfi_def_cfa_offset 5888 diff --git a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll index bb2be8846ec2f..0e21bae1483b3 100644 --- a/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium-natural-probes.ll @@ -7,20 +7,18 @@ target triple = "x86_64-unknown-linux-gnu" define i32 @foo() local_unnamed_addr #0 { ; CHECK-LABEL: foo: -; CHECK: # %bb.0: -; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; CHECK-NEXT: movq $0, (%rsp) -; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8 -; CHECK-NEXT: .cfi_def_cfa_offset 7888 -; CHECK-NEXT: movl $1, 264(%rsp) -; CHECK-NEXT: movl $1, 4664(%rsp) -; CHECK-NEXT: movl -128(%rsp), %eax -; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8 -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: retq - - - +; CHECK: # %bb.0: +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4096 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: subq $3784, %rsp # imm = 0xEC8 +; CHECK-NEXT: .cfi_def_cfa_offset 7888 +; CHECK-NEXT: movl $1, 264(%rsp) +; CHECK-NEXT: movl $1, 4664(%rsp) +; CHECK-NEXT: movl -128(%rsp), %eax +; CHECK-NEXT: addq $7880, %rsp # imm = 0x1EC8 +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq %a = alloca i32, i64 2000, align 16 %b0 = getelementptr inbounds i32, i32* %a, i64 98 %b1 = getelementptr inbounds i32, i32* %a, i64 1198 diff --git a/llvm/test/CodeGen/X86/stack-clash-medium.ll b/llvm/test/CodeGen/X86/stack-clash-medium.ll index 5a97074025f1f..c4e2e86d5e17c 100644 --- a/llvm/test/CodeGen/X86/stack-clash-medium.ll +++ b/llvm/test/CodeGen/X86/stack-clash-medium.ll @@ -1,7 +1,32 @@ -; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s -; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s +; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X86-64 %s +; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86-32 %s define i32 @foo() local_unnamed_addr #0 { +; CHECK-X86-64-LABEL: foo: +; CHECK-X86-64: # %bb.0: +; CHECK-X86-64-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-X86-64-NEXT: .cfi_adjust_cfa_offset 4096 +; CHECK-X86-64-NEXT: movq $0, (%rsp) +; CHECK-X86-64-NEXT: subq $3784, %rsp # imm = 0xEC8 +; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 7888 +; CHECK-X86-64-NEXT: movl $1, 672(%rsp) +; CHECK-X86-64-NEXT: movl -128(%rsp), %eax +; CHECK-X86-64-NEXT: addq $7880, %rsp # imm = 0x1EC8 +; CHECK-X86-64-NEXT: .cfi_def_cfa_offset 8 +; CHECK-X86-64-NEXT: retq +; +; CHECK-X86-32-LABEL: foo: +; CHECK-X86-32: # %bb.0: +; CHECK-X86-32-NEXT: subl $4096, %esp # imm = 0x1000 +; CHECK-X86-32-NEXT: .cfi_adjust_cfa_offset 4096 +; CHECK-X86-32-NEXT: movl $0, (%esp) +; CHECK-X86-32-NEXT: subl $3916, %esp # imm = 0xF4C +; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 8016 +; CHECK-X86-32-NEXT: movl $1, 800(%esp) +; CHECK-X86-32-NEXT: movl (%esp), %eax +; CHECK-X86-32-NEXT: addl $8012, %esp # imm = 0x1F4C +; CHECK-X86-32-NEXT: .cfi_def_cfa_offset 4 +; CHECK-X86-32-NEXT: retl %a = alloca i32, i64 2000, align 16 %b = getelementptr inbounds i32, i32* %a, i64 200 store volatile i32 1, i32* %b diff --git a/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll b/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll index 9294d70528fa2..aacd5fd1c2ed3 100644 --- a/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll +++ b/llvm/test/CodeGen/X86/stack-clash-unknown-call.ll @@ -7,22 +7,20 @@ target triple = "x86_64-unknown-linux-gnu" declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg); define void @foo() local_unnamed_addr #0 { - -;CHECK-LABEL: foo: -;CHECK: # %bb.0: -;CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 -; it's important that we don't use the call as a probe here -;CHECK-NEXT: movq $0, (%rsp) -;CHECK-NEXT: subq $3912, %rsp # imm = 0xF48 -;CHECK-NEXT: .cfi_def_cfa_offset 8016 -;CHECK-NEXT: movq %rsp, %rdi -;CHECK-NEXT: movl $8000, %edx # imm = 0x1F40 -;CHECK-NEXT: xorl %esi, %esi -;CHECK-NEXT: callq memset -;CHECK-NEXT: addq $8008, %rsp # imm = 0x1F48 -;CHECK-NEXT: .cfi_def_cfa_offset 8 -;CHECK-NEXT: retq - +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: subq $4096, %rsp # imm = 0x1000 +; CHECK-NEXT: .cfi_adjust_cfa_offset 4096 +; CHECK-NEXT: movq $0, (%rsp) +; CHECK-NEXT: subq $3912, %rsp # imm = 0xF48 +; CHECK-NEXT: .cfi_def_cfa_offset 8016 +; CHECK-NEXT: movq %rsp, %rdi +; CHECK-NEXT: movl $8000, %edx # imm = 0x1F40 +; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: callq memset@PLT +; CHECK-NEXT: addq $8008, %rsp # imm = 0x1F48 +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq %a = alloca i8, i64 8000, align 16 call void @llvm.memset.p0i8.i64(i8* align 16 %a, i8 0, i64 8000, i1 false) ret void