Skip to content

Commit ff3b085

Browse files
committed
[X86] Use bundle for CALL_RVMARKER expansion.
This patch updates expandCALL_RVMARKER to wrap the call, marker and objc runtime call in an instruction bundle. This ensures later passes, like machine block placement, cannot break them up. On AArch64, the instruction sequence is already wrapped in a bundle. Keeping the whole instruction sequence together is highly desirable for performance and outweighs potential other benefits from breaking the sequence up. Reviewed By: ahatanak Differential Revision: https://reviews.llvm.org/D115230
1 parent 47eec78 commit ff3b085

File tree

4 files changed

+52
-24
lines changed

4 files changed

+52
-24
lines changed

llvm/lib/Target/X86/X86ExpandPseudo.cpp

+16-10
Original file line numberDiff line numberDiff line change
@@ -191,8 +191,6 @@ void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
191191
MachineBasicBlock::iterator MBBI) {
192192
// Expand CALL_RVMARKER pseudo to call instruction, followed by the special
193193
//"movq %rax, %rdi" marker.
194-
// TODO: Mark the sequence as bundle, to avoid passes moving other code
195-
// in between.
196194
MachineInstr &MI = *MBBI;
197195

198196
MachineInstr *OriginalCall;
@@ -236,15 +234,23 @@ void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
236234
// Emit call to ObjC runtime.
237235
const uint32_t *RegMask =
238236
TRI->getCallPreservedMask(*MBB.getParent(), CallingConv::C);
239-
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64pcrel32))
240-
.addGlobalAddress(MI.getOperand(0).getGlobal(), 0, 0)
241-
.addRegMask(RegMask)
242-
.addReg(X86::RAX,
243-
RegState::Implicit |
244-
(RAXImplicitDead ? (RegState::Dead | RegState::Define)
245-
: RegState::Define))
246-
.getInstr();
237+
MachineInstr *RtCall =
238+
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64pcrel32))
239+
.addGlobalAddress(MI.getOperand(0).getGlobal(), 0, 0)
240+
.addRegMask(RegMask)
241+
.addReg(X86::RAX,
242+
RegState::Implicit |
243+
(RAXImplicitDead ? (RegState::Dead | RegState::Define)
244+
: RegState::Define))
245+
.getInstr();
247246
MI.eraseFromParent();
247+
248+
auto &TM = MBB.getParent()->getTarget();
249+
// On Darwin platforms, wrap the expanded sequence in a bundle to prevent
250+
// later optimizations from breaking up the sequence.
251+
if (TM.getTargetTriple().isOSDarwin())
252+
finalizeBundle(MBB, OriginalCall->getIterator(),
253+
std::next(RtCall->getIterator()));
248254
}
249255

250256
/// If \p MBBI is a pseudo instruction, this method expands

llvm/lib/Target/X86/X86TargetMachine.cpp

+12
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,18 @@ void X86PassConfig::addPreEmitPass2() {
588588

589589
// Insert pseudo probe annotation for callsite profiling
590590
addPass(createPseudoProbeInserter());
591+
592+
// On Darwin platforms, BLR_RVMARKER pseudo instructions are lowered to
593+
// bundles.
594+
if (TT.isOSDarwin())
595+
addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
596+
// Only run bundle expansion if there are relevant ObjC runtime functions
597+
// present in the module.
598+
const Function &F = MF.getFunction();
599+
const Module *M = F.getParent();
600+
return M->getFunction("objc_retainAutoreleasedReturnValue") ||
601+
M->getFunction("objc_unsafeClaimAutoreleasedReturnValue");
602+
}));
591603
}
592604

593605
bool X86PassConfig::addPostFastRegAllocRewrite() {

llvm/test/CodeGen/X86/call-rv-marker.ll

+4-2
Original file line numberDiff line numberDiff line change
@@ -232,14 +232,16 @@ define i8* @rv_marker_block_placement(i1 %c.0) {
232232

233233
; CHECK-NEXT: ## %bb.1:
234234
; CHECK-NEXT: callq _fn1
235+
; CHECK-NEXT: movq %rax, %rdi
236+
; CHECK-NEXT: callq _objc_retainAutoreleasedReturnValue
235237
; CHECK-NEXT: jmp LBB8_3
236238

237239
; CHECK-NEXT: LBB8_2:
238240
; CHECK-NEXT: callq _fn2
239-
240-
; CHECK-NEXT: LBB8_3:
241241
; CHECK-NEXT: movq %rax, %rdi
242242
; CHECK-NEXT: callq _objc_retainAutoreleasedReturnValue
243+
244+
; CHECK-NEXT: LBB8_3:
243245
; CHECK-NEXT: xorl %eax, %eax
244246
; CHECK-NEXT: popq %rcx
245247
; CHECK-NEXT: retq

llvm/test/CodeGen/X86/expand-call-rvmarker.mir

+20-12
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@
3030
# CHECK: bb.0
3131
# CHECK-NEXT: frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp
3232
# CHECK-NEXT: CFI_INSTRUCTION def_cfa_offset 16
33-
# CHECK-NEXT: CALL64pcrel32 @fn, csr_64, implicit $rsp, implicit $ssp, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
34-
# CHECK-NEXT: $rdi = MOV64rr $rax
35-
# CHECK-NEXT: CALL64pcrel32 @objc_retainAutoreleasedReturnValue, csr_64, implicit $rsp, implicit $ssp, implicit-def $rax
33+
# CHECK-NEXT: BUNDLE
34+
# CHECK-NEXT: CALL64pcrel32 @fn, csr_64, implicit $rsp, implicit $ssp, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
35+
# CHECK-NEXT: $rdi = MOV64rr internal $rax
36+
# CHECK-NEXT: CALL64pcrel32 @objc_retainAutoreleasedReturnValue, csr_64, implicit internal $rsp, implicit internal $ssp, implicit-def $rax
37+
# CHECK-NEXT: }
3638
# CHECK-NEXT: $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp
3739
# CHECK-NEXT: RET64
3840
#
@@ -62,9 +64,11 @@ body: |
6264
# CHECK: bb.0
6365
# CHECK-NEXT: frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp
6466
# CHECK-NEXT: CFI_INSTRUCTION def_cfa_offset 16
65-
# CHECK-NEXT: CALL64pcrel32 @fn, csr_64, implicit $rsp, implicit $ssp, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
66-
# CHECK-NEXT: $rdi = MOV64rr $rax
67-
# CHECK-NEXT: CALL64pcrel32 @objc_unsafeClaimAutoreleasedReturnValue, csr_64, implicit $rsp, implicit $ssp, implicit-def $rax
67+
# CHECK-NEXT: BUNDLE
68+
# CHECK-NEXT: CALL64pcrel32 @fn, csr_64, implicit $rsp, implicit $ssp, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
69+
# CHECK-NEXT: $rdi = MOV64rr internal $rax
70+
# CHECK-NEXT: CALL64pcrel32 @objc_unsafeClaimAutoreleasedReturnValue, csr_64, implicit internal $rsp, implicit internal $ssp, implicit-def $rax
71+
# CHECK-NEXT: }
6872
# CHECK-NEXT: $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp
6973
# CHECK-NEXT: RET64
7074
#
@@ -95,9 +99,11 @@ body: |
9599
# CHECK-NEXT: $rax = MOV64rr $rdi
96100
# CHECK-NEXT: $rdi = MOV64rr killed $rdx
97101
# CHECK-NEXT: $rdx = MOV64rr killed $rax
98-
# CHECK-NEXT: CALL64pcrel32 @fn, csr_64, implicit $rsp, implicit $ssp, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
99-
# CHECK-NEXT: $rdi = MOV64rr $rax
100-
# CHECK-NEXT: CALL64pcrel32 @objc_retainAutoreleasedReturnValue, csr_64, implicit $rsp, implicit $ssp, implicit-def dead $rax
102+
# CHECK-NEXT: BUNDLE
103+
# CHECK-NEXT: CALL64pcrel32 @fn, csr_64, implicit $rsp, implicit $ssp, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit $rdx, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
104+
# CHECK-NEXT: $rdi = MOV64rr internal $rax
105+
# CHECK-NEXT: CALL64pcrel32 @objc_retainAutoreleasedReturnValue, csr_64, implicit internal $rsp, implicit internal $ssp, implicit-def dead $rax
106+
# CHECK-NEXT: }
101107
# CHECK-NEXT: $rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp
102108
# CHECK-NEXT: RET64
103109
#
@@ -129,9 +135,11 @@ body: |
129135
# CHECK: bb.0
130136
# CHECK-NEXT: frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp
131137
# CHECK-NEXT: CFI_INSTRUCTION def_cfa_offset 16
132-
# CHECK-NEXT: CALL64pcrel32 @fn, csr_64, implicit $rsp, implicit $ssp, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
133-
# CHECK-NEXT: $rdi = MOV64rr $rax
134-
# CHECK-NEXT: CALL64pcrel32 @objc_retainAutoreleasedReturnValue, csr_64, implicit $rsp, implicit $ssp, implicit-def dead $rax
138+
# CHECK-NEXT: BUNDLE
139+
# CHECK-NEXT: CALL64pcrel32 @fn, csr_64, implicit $rsp, implicit $ssp, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
140+
# CHECK-NEXT: $rdi = MOV64rr internal $rax
141+
# CHECK-NEXT: CALL64pcrel32 @objc_retainAutoreleasedReturnValue, csr_64, implicit internal $rsp, implicit internal $ssp, implicit-def dead $rax
142+
# CHECK-NEXT: }
135143
# CHECK-NEXT: $rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp
136144
# CHECK-NEXT: RET64
137145
#

0 commit comments

Comments
 (0)