Skip to content

Commit 9cb8f4d

Browse files
committed
[ARM] Add a tail-predication loop predicate register
The semantics of tail predication loops means that the value of LR as an instruction is executed determines the predicate. In other words: mov r3, #3 DLSTP lr, r3 // Start tail predication, lr==3 VADD.s32 q0, q1, q2 // Lanes 0,1 and 2 are updated in q0. mov lr, #1 VADD.s32 q0, q1, q2 // Only first lane is updated. This means that the value of lr cannot be spilled and re-used in tail predication regions without potentially altering the behaviour of the program. More lanes than required could be stored, for example, and in the case of a gather those lanes might not have been setup, leading to alignment exceptions. This patch adds a new lr predicate operand to MVE instructions in order to keep a reference to the lr that they use as a tail predicate. It will usually hold the zeroreg meaning not predicated, being set to the LR phi value in the MVETPAndVPTOptimisationsPass. This will prevent it from being spilled anywhere that it needs to be used. A lot of tests needed updating. Differential Revision: https://reviews.llvm.org/D107638
1 parent 555a817 commit 9cb8f4d

File tree

105 files changed

+2833
-2753
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+2833
-2753
lines changed

llvm/lib/CodeGen/MachineVerifier.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -1653,6 +1653,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
16531653
report("Unspillable Terminator does not define a reg", MI);
16541654
Register Def = MI->getOperand(0).getReg();
16551655
if (Def.isVirtual() &&
1656+
!MF->getProperties().hasProperty(
1657+
MachineFunctionProperties::Property::NoPHIs) &&
16561658
std::distance(MRI->use_nodbg_begin(Def), MRI->use_nodbg_end()) > 1)
16571659
report("Unspillable Terminator expected to have at most one use!", MI);
16581660
}

llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,7 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
867867
void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) {
868868
MIB.addImm(ARMVCC::None);
869869
MIB.addReg(0);
870+
MIB.addReg(0); // tp_reg
870871
}
871872

872873
void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
@@ -878,6 +879,7 @@ void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
878879
void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) {
879880
MIB.addImm(Cond);
880881
MIB.addReg(ARM::VPR, RegState::Implicit);
882+
MIB.addReg(0); // tp_reg
881883
}
882884

883885
void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB,

llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -1822,8 +1822,11 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
18221822
else
18231823
return false;
18241824

1825-
SDValue Ops[] = {Base, NewOffset,
1826-
CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg,
1825+
SDValue Ops[] = {Base,
1826+
NewOffset,
1827+
CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1828+
PredReg,
1829+
CurDAG->getRegister(0, MVT::i32), // tp_reg
18271830
Chain};
18281831
SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
18291832
N->getValueType(0), MVT::Other, Ops);
@@ -2529,6 +2532,7 @@ void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
25292532
SDValue PredicateMask) {
25302533
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
25312534
Ops.push_back(PredicateMask);
2535+
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
25322536
}
25332537

25342538
template <typename SDValueVector>
@@ -2537,20 +2541,23 @@ void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
25372541
SDValue Inactive) {
25382542
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
25392543
Ops.push_back(PredicateMask);
2544+
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
25402545
Ops.push_back(Inactive);
25412546
}
25422547

25432548
template <typename SDValueVector>
25442549
void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
25452550
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
25462551
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2552+
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
25472553
}
25482554

25492555
template <typename SDValueVector>
25502556
void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
25512557
EVT InactiveTy) {
25522558
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
25532559
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2560+
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
25542561
Ops.push_back(SDValue(
25552562
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
25562563
}

llvm/lib/Target/ARM/ARMISelLowering.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -11542,6 +11542,7 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
1154211542
BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
1154311543
.addUse(PredCounterPhiReg)
1154411544
.addImm(ARMVCC::None)
11545+
.addReg(0)
1154511546
.addReg(0);
1154611547

1154711548
BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
@@ -11560,7 +11561,8 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
1156011561
.addReg(SrcPhiReg)
1156111562
.addImm(16)
1156211563
.addImm(ARMVCC::Then)
11563-
.addUse(VccrReg);
11564+
.addUse(VccrReg)
11565+
.addReg(0);
1156411566
} else
1156511567
SrcValueReg = OpSrcReg;
1156611568

@@ -11570,7 +11572,8 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
1157011572
.addReg(DestPhiReg)
1157111573
.addImm(16)
1157211574
.addImm(ARMVCC::Then)
11573-
.addUse(VccrReg);
11575+
.addUse(VccrReg)
11576+
.addReg(0);
1157411577

1157511578
// Add the pseudoInstrs for decrementing the loop counter and marking the
1157611579
// end:t2DoLoopDec and t2DoLoopEnd

llvm/lib/Target/ARM/ARMInstrCDE.td

+6-6
Original file line numberDiff line numberDiff line change
@@ -612,30 +612,30 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
612612
(VTI.Vec MQPR:$inactive), timm:$imm,
613613
(VTI.Pred VCCR:$pred))),
614614
(VTI.Vec (CDE_VCX1_vec p_imm:$coproc, imm_12b:$imm, ARMVCCThen,
615-
(VTI.Pred VCCR:$pred),
615+
(VTI.Pred VCCR:$pred), zero_reg,
616616
(VTI.Vec MQPR:$inactive)))>;
617617
def : Pat<(VTI.Vec (int_arm_cde_vcx1qa_predicated timm:$coproc,
618618
(VTI.Vec MQPR:$acc), timm:$imm,
619619
(VTI.Pred VCCR:$pred))),
620620
(VTI.Vec (CDE_VCX1A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
621621
imm_12b:$imm, ARMVCCThen,
622-
(VTI.Pred VCCR:$pred)))>;
622+
(VTI.Pred VCCR:$pred), zero_reg))>;
623623

624624
def : Pat<(VTI.Vec (int_arm_cde_vcx2q_predicated timm:$coproc,
625625
(VTI.Vec MQPR:$inactive),
626626
(v16i8 MQPR:$n), timm:$imm,
627627
(VTI.Pred VCCR:$pred))),
628628
(VTI.Vec (CDE_VCX2_vec p_imm:$coproc, (v16i8 MQPR:$n),
629629
imm_7b:$imm, ARMVCCThen,
630-
(VTI.Pred VCCR:$pred),
630+
(VTI.Pred VCCR:$pred), zero_reg,
631631
(VTI.Vec MQPR:$inactive)))>;
632632
def : Pat<(VTI.Vec (int_arm_cde_vcx2qa_predicated timm:$coproc,
633633
(VTI.Vec MQPR:$acc),
634634
(v16i8 MQPR:$n), timm:$imm,
635635
(VTI.Pred VCCR:$pred))),
636636
(VTI.Vec (CDE_VCX2A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
637637
(v16i8 MQPR:$n), timm:$imm, ARMVCCThen,
638-
(VTI.Pred VCCR:$pred)))>;
638+
(VTI.Pred VCCR:$pred), zero_reg))>;
639639

640640
def : Pat<(VTI.Vec (int_arm_cde_vcx3q_predicated timm:$coproc,
641641
(VTI.Vec MQPR:$inactive),
@@ -645,7 +645,7 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
645645
(VTI.Vec (CDE_VCX3_vec p_imm:$coproc, (v16i8 MQPR:$n),
646646
(v16i8 MQPR:$m),
647647
imm_4b:$imm, ARMVCCThen,
648-
(VTI.Pred VCCR:$pred),
648+
(VTI.Pred VCCR:$pred), zero_reg,
649649
(VTI.Vec MQPR:$inactive)))>;
650650
def : Pat<(VTI.Vec (int_arm_cde_vcx3qa_predicated timm:$coproc,
651651
(VTI.Vec MQPR:$acc),
@@ -654,7 +654,7 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
654654
(VTI.Vec (CDE_VCX3A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
655655
(v16i8 MQPR:$n), (v16i8 MQPR:$m),
656656
imm_4b:$imm, ARMVCCThen,
657-
(VTI.Pred VCCR:$pred)))>;
657+
(VTI.Pred VCCR:$pred), zero_reg))>;
658658
}
659659

660660
let Predicates = [HasCDE, HasMVEInt] in

llvm/lib/Target/ARM/ARMInstrFormats.td

+2-2
Original file line numberDiff line numberDiff line change
@@ -249,10 +249,10 @@ def VPTPredROperand : AsmOperandClass {
249249

250250
// Base class for both kinds of vpred.
251251
class vpred_ops<dag extra_op, dag extra_mi> : OperandWithDefaultOps<OtherVT,
252-
!con((ops (i32 0), (i32 zero_reg)), extra_op)> {
252+
!con((ops (i32 0), (i32 zero_reg), (i32 zero_reg)), extra_op)> {
253253
let PrintMethod = "printVPTPredicateOperand";
254254
let OperandNamespace = "ARM";
255-
let MIOperandInfo = !con((ops i32imm:$cond, VCCR:$cond_reg), extra_mi);
255+
let MIOperandInfo = !con((ops i32imm:$cond, VCCR:$cond_reg, GPRlr:$tp_reg), extra_mi);
256256

257257
// For convenience, we provide a string value that can be appended
258258
// to the constraints string. It's empty for vpred_n, and for

0 commit comments

Comments
 (0)