@@ -586,29 +586,55 @@ void X86FrameLowering::emitStackProbeInlineGeneric(
586
586
const uint64_t StackProbeSize = TLI.getStackProbeSize (MF);
587
587
uint64_t ProbeChunk = StackProbeSize * 8 ;
588
588
589
+ uint64_t MaxAlign =
590
+ TRI->needsStackRealignment (MF) ? calculateMaxStackAlign (MF) : 0 ;
591
+
589
592
// Synthesize a loop or unroll it, depending on the number of iterations.
593
+ // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
594
+ // between the unaligned rsp and current rsp.
590
595
if (Offset > ProbeChunk) {
591
- emitStackProbeInlineGenericLoop (MF, MBB, MBBI, DL, Offset);
596
+ emitStackProbeInlineGenericLoop (MF, MBB, MBBI, DL, Offset,
597
+ MaxAlign % StackProbeSize);
592
598
} else {
593
- emitStackProbeInlineGenericBlock (MF, MBB, MBBI, DL, Offset);
599
+ emitStackProbeInlineGenericBlock (MF, MBB, MBBI, DL, Offset,
600
+ MaxAlign % StackProbeSize);
594
601
}
595
602
}
596
603
597
604
void X86FrameLowering::emitStackProbeInlineGenericBlock (
598
605
MachineFunction &MF, MachineBasicBlock &MBB,
599
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
600
- uint64_t Offset ) const {
606
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
607
+ uint64_t AlignOffset ) const {
601
608
602
609
const X86Subtarget &STI = MF.getSubtarget <X86Subtarget>();
603
610
const X86TargetLowering &TLI = *STI.getTargetLowering ();
604
611
const unsigned Opc = getSUBriOpcode (Uses64BitFramePtr, Offset);
605
612
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
606
613
const uint64_t StackProbeSize = TLI.getStackProbeSize (MF);
614
+
607
615
uint64_t CurrentOffset = 0 ;
608
- // 0 Thanks to return address being saved on the stack
609
- uint64_t CurrentProbeOffset = 0 ;
610
616
611
- // For the first N - 1 pages, just probe. I tried to take advantage of
617
+ assert (AlignOffset < StackProbeSize);
618
+
619
+ // If the offset is so small it fits within a page, there's nothing to do.
620
+ if (StackProbeSize < Offset + AlignOffset) {
621
+
622
+ MachineInstr *MI = BuildMI (MBB, MBBI, DL, TII.get (Opc), StackPtr)
623
+ .addReg (StackPtr)
624
+ .addImm (StackProbeSize - AlignOffset)
625
+ .setMIFlag (MachineInstr::FrameSetup);
626
+ MI->getOperand (3 ).setIsDead (); // The EFLAGS implicit def is dead.
627
+
628
+ addRegOffset (BuildMI (MBB, MBBI, DL, TII.get (MovMIOpc))
629
+ .setMIFlag (MachineInstr::FrameSetup),
630
+ StackPtr, false , 0 )
631
+ .addImm (0 )
632
+ .setMIFlag (MachineInstr::FrameSetup);
633
+ NumFrameExtraProbe++;
634
+ CurrentOffset = StackProbeSize - AlignOffset;
635
+ }
636
+
637
+ // For the next N - 1 pages, just probe. I tried to take advantage of
612
638
// natural probes but it implies much more logic and there was very few
613
639
// interesting natural probes to interleave.
614
640
while (CurrentOffset + StackProbeSize < Offset) {
@@ -626,9 +652,9 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
626
652
.setMIFlag (MachineInstr::FrameSetup);
627
653
NumFrameExtraProbe++;
628
654
CurrentOffset += StackProbeSize;
629
- CurrentProbeOffset += StackProbeSize;
630
655
}
631
656
657
+ // No need to probe the tail, it is smaller than a Page.
632
658
uint64_t ChunkSize = Offset - CurrentOffset;
633
659
MachineInstr *MI = BuildMI (MBB, MBBI, DL, TII.get (Opc), StackPtr)
634
660
.addReg (StackPtr)
@@ -639,15 +665,35 @@ void X86FrameLowering::emitStackProbeInlineGenericBlock(
639
665
640
666
void X86FrameLowering::emitStackProbeInlineGenericLoop (
641
667
MachineFunction &MF, MachineBasicBlock &MBB,
642
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
643
- uint64_t Offset ) const {
668
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset,
669
+ uint64_t AlignOffset ) const {
644
670
assert (Offset && " null offset" );
645
671
646
672
const X86Subtarget &STI = MF.getSubtarget <X86Subtarget>();
647
673
const X86TargetLowering &TLI = *STI.getTargetLowering ();
648
674
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
649
675
const uint64_t StackProbeSize = TLI.getStackProbeSize (MF);
650
676
677
+ if (AlignOffset) {
678
+ if (AlignOffset < StackProbeSize) {
679
+ // Perform a first smaller allocation followed by a probe.
680
+ const unsigned SUBOpc = getSUBriOpcode (Uses64BitFramePtr, AlignOffset);
681
+ MachineInstr *MI = BuildMI (MBB, MBBI, DL, TII.get (SUBOpc), StackPtr)
682
+ .addReg (StackPtr)
683
+ .addImm (AlignOffset)
684
+ .setMIFlag (MachineInstr::FrameSetup);
685
+ MI->getOperand (3 ).setIsDead (); // The EFLAGS implicit def is dead.
686
+
687
+ addRegOffset (BuildMI (MBB, MBBI, DL, TII.get (MovMIOpc))
688
+ .setMIFlag (MachineInstr::FrameSetup),
689
+ StackPtr, false , 0 )
690
+ .addImm (0 )
691
+ .setMIFlag (MachineInstr::FrameSetup);
692
+ NumFrameExtraProbe++;
693
+ Offset -= AlignOffset;
694
+ }
695
+ }
696
+
651
697
// Synthesize a loop
652
698
NumFrameLoopProbe++;
653
699
const BasicBlock *LLVM_BB = MBB.getBasicBlock ();
@@ -666,17 +712,17 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
666
712
667
713
// save loop bound
668
714
{
669
- const unsigned Opc = getSUBriOpcode (Uses64BitFramePtr, Offset);
670
- BuildMI (MBB, MBBI, DL, TII.get (Opc ), FinalStackProbed)
715
+ const unsigned SUBOpc = getSUBriOpcode (Uses64BitFramePtr, Offset);
716
+ BuildMI (MBB, MBBI, DL, TII.get (SUBOpc ), FinalStackProbed)
671
717
.addReg (FinalStackProbed)
672
718
.addImm (Offset / StackProbeSize * StackProbeSize)
673
719
.setMIFlag (MachineInstr::FrameSetup);
674
720
}
675
721
676
722
// allocate a page
677
723
{
678
- const unsigned Opc = getSUBriOpcode (Uses64BitFramePtr, StackProbeSize);
679
- BuildMI (testMBB, DL, TII.get (Opc ), StackPtr)
724
+ const unsigned SUBOpc = getSUBriOpcode (Uses64BitFramePtr, StackProbeSize);
725
+ BuildMI (testMBB, DL, TII.get (SUBOpc ), StackPtr)
680
726
.addReg (StackPtr)
681
727
.addImm (StackProbeSize)
682
728
.setMIFlag (MachineInstr::FrameSetup);
@@ -1052,13 +1098,149 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
1052
1098
uint64_t MaxAlign) const {
1053
1099
uint64_t Val = -MaxAlign;
1054
1100
unsigned AndOp = getANDriOpcode (Uses64BitFramePtr, Val);
1055
- MachineInstr *MI = BuildMI (MBB, MBBI, DL, TII.get (AndOp), Reg)
1056
- .addReg (Reg)
1057
- .addImm (Val)
1058
- .setMIFlag (MachineInstr::FrameSetup);
1059
1101
1060
- // The EFLAGS implicit def is dead.
1061
- MI->getOperand (3 ).setIsDead ();
1102
+ MachineFunction &MF = *MBB.getParent ();
1103
+ const X86Subtarget &STI = MF.getSubtarget <X86Subtarget>();
1104
+ const X86TargetLowering &TLI = *STI.getTargetLowering ();
1105
+ const uint64_t StackProbeSize = TLI.getStackProbeSize (MF);
1106
+ const bool EmitInlineStackProbe = TLI.hasInlineStackProbe (MF);
1107
+
1108
+ // We want to make sure that (in worst case) less than StackProbeSize bytes
1109
+ // are not probed after the AND. This assumption is used in
1110
+ // emitStackProbeInlineGeneric.
1111
+ if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) {
1112
+ {
1113
+ NumFrameLoopProbe++;
1114
+ MachineBasicBlock *entryMBB =
1115
+ MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
1116
+ MachineBasicBlock *headMBB =
1117
+ MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
1118
+ MachineBasicBlock *bodyMBB =
1119
+ MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
1120
+ MachineBasicBlock *footMBB =
1121
+ MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
1122
+
1123
+ MachineFunction::iterator MBBIter = MBB.getIterator ();
1124
+ MF.insert (MBBIter, entryMBB);
1125
+ MF.insert (MBBIter, headMBB);
1126
+ MF.insert (MBBIter, bodyMBB);
1127
+ MF.insert (MBBIter, footMBB);
1128
+ const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
1129
+ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
1130
+
1131
+ // Setup entry block
1132
+ {
1133
+
1134
+ entryMBB->splice (entryMBB->end (), &MBB, MBB.begin (), MBBI);
1135
+ BuildMI (entryMBB, DL, TII.get (TargetOpcode::COPY), FinalStackProbed)
1136
+ .addReg (StackPtr)
1137
+ .setMIFlag (MachineInstr::FrameSetup);
1138
+ MachineInstr *MI =
1139
+ BuildMI (entryMBB, DL, TII.get (AndOp), FinalStackProbed)
1140
+ .addReg (FinalStackProbed)
1141
+ .addImm (Val)
1142
+ .setMIFlag (MachineInstr::FrameSetup);
1143
+
1144
+ // The EFLAGS implicit def is dead.
1145
+ MI->getOperand (3 ).setIsDead ();
1146
+
1147
+ BuildMI (entryMBB, DL,
1148
+ TII.get (Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1149
+ .addReg (FinalStackProbed)
1150
+ .addReg (StackPtr)
1151
+ .setMIFlag (MachineInstr::FrameSetup);
1152
+ BuildMI (entryMBB, DL, TII.get (X86::JCC_1))
1153
+ .addMBB (&MBB)
1154
+ .addImm (X86::COND_E)
1155
+ .setMIFlag (MachineInstr::FrameSetup);
1156
+ entryMBB->addSuccessor (headMBB);
1157
+ entryMBB->addSuccessor (&MBB);
1158
+ }
1159
+
1160
+ // Loop entry block
1161
+
1162
+ {
1163
+ const unsigned SUBOpc =
1164
+ getSUBriOpcode (Uses64BitFramePtr, StackProbeSize);
1165
+ BuildMI (headMBB, DL, TII.get (SUBOpc), StackPtr)
1166
+ .addReg (StackPtr)
1167
+ .addImm (StackProbeSize)
1168
+ .setMIFlag (MachineInstr::FrameSetup);
1169
+
1170
+ BuildMI (headMBB, DL,
1171
+ TII.get (Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1172
+ .addReg (FinalStackProbed)
1173
+ .addReg (StackPtr)
1174
+ .setMIFlag (MachineInstr::FrameSetup);
1175
+
1176
+ // jump
1177
+ BuildMI (headMBB, DL, TII.get (X86::JCC_1))
1178
+ .addMBB (footMBB)
1179
+ .addImm (X86::COND_B)
1180
+ .setMIFlag (MachineInstr::FrameSetup);
1181
+
1182
+ headMBB->addSuccessor (bodyMBB);
1183
+ headMBB->addSuccessor (footMBB);
1184
+ }
1185
+
1186
+ // setup loop body
1187
+ {
1188
+ addRegOffset (BuildMI (bodyMBB, DL, TII.get (MovMIOpc))
1189
+ .setMIFlag (MachineInstr::FrameSetup),
1190
+ StackPtr, false , 0 )
1191
+ .addImm (0 )
1192
+ .setMIFlag (MachineInstr::FrameSetup);
1193
+
1194
+ const unsigned SUBOpc =
1195
+ getSUBriOpcode (Uses64BitFramePtr, StackProbeSize);
1196
+ BuildMI (bodyMBB, DL, TII.get (SUBOpc), StackPtr)
1197
+ .addReg (StackPtr)
1198
+ .addImm (StackProbeSize)
1199
+ .setMIFlag (MachineInstr::FrameSetup);
1200
+
1201
+ // cmp with stack pointer bound
1202
+ BuildMI (bodyMBB, DL,
1203
+ TII.get (Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
1204
+ .addReg (FinalStackProbed)
1205
+ .addReg (StackPtr)
1206
+ .setMIFlag (MachineInstr::FrameSetup);
1207
+
1208
+ // jump
1209
+ BuildMI (bodyMBB, DL, TII.get (X86::JCC_1))
1210
+ .addMBB (bodyMBB)
1211
+ .addImm (X86::COND_B)
1212
+ .setMIFlag (MachineInstr::FrameSetup);
1213
+ bodyMBB->addSuccessor (bodyMBB);
1214
+ bodyMBB->addSuccessor (footMBB);
1215
+ }
1216
+
1217
+ // setup loop footer
1218
+ {
1219
+ BuildMI (footMBB, DL, TII.get (TargetOpcode::COPY), StackPtr)
1220
+ .addReg (FinalStackProbed)
1221
+ .setMIFlag (MachineInstr::FrameSetup);
1222
+ addRegOffset (BuildMI (footMBB, DL, TII.get (MovMIOpc))
1223
+ .setMIFlag (MachineInstr::FrameSetup),
1224
+ StackPtr, false , 0 )
1225
+ .addImm (0 )
1226
+ .setMIFlag (MachineInstr::FrameSetup);
1227
+ footMBB->addSuccessor (&MBB);
1228
+ }
1229
+
1230
+ recomputeLiveIns (*headMBB);
1231
+ recomputeLiveIns (*bodyMBB);
1232
+ recomputeLiveIns (*footMBB);
1233
+ recomputeLiveIns (MBB);
1234
+ }
1235
+ } else {
1236
+ MachineInstr *MI = BuildMI (MBB, MBBI, DL, TII.get (AndOp), Reg)
1237
+ .addReg (Reg)
1238
+ .addImm (Val)
1239
+ .setMIFlag (MachineInstr::FrameSetup);
1240
+
1241
+ // The EFLAGS implicit def is dead.
1242
+ MI->getOperand (3 ).setIsDead ();
1243
+ }
1062
1244
}
1063
1245
1064
1246
bool X86FrameLowering::has128ByteRedZone (const MachineFunction& MF) const {
0 commit comments