Skip to content

Commit 0ab539f

Browse files
authored
[VPlan] Add new VPScalarCastRecipe, use for IV & step trunc. (#78113)
Add a new recipe to model scalar cast instructions, without relying on an underlying instruction. This allows creating scalar casts, without relying on an underlying instruction (like the current VPReplicateRecipe). The new recipe is used to explicitly model both truncating the induction step and the VPDerivedIVRecipe, thus simplifying both the recipe and code needed to introduce it. Truncating VPWidenIntOrFpInductionRecipes should also be modeled using the new recipe, as follow-up. PR: #78113
1 parent b64c26f commit 0ab539f

File tree

9 files changed

+140
-48
lines changed

9 files changed

+140
-48
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9277,12 +9277,6 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
92779277
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
92789278
Kind, cast_if_present<BinaryOperator>(FPBinOp));
92799279
DerivedIV->setName("offset.idx");
9280-
if (TruncResultTy) {
9281-
assert(TruncResultTy != DerivedIV->getType() &&
9282-
Step->getType()->isIntegerTy() &&
9283-
"Truncation requires an integer step");
9284-
DerivedIV = State.Builder.CreateTrunc(DerivedIV, TruncResultTy);
9285-
}
92869280
assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");
92879281

92889282
State.set(this, DerivedIV, VPIteration(0, 0));

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
859859
case VPRecipeBase::VPWidenIntOrFpInductionSC:
860860
case VPRecipeBase::VPWidenPointerInductionSC:
861861
case VPRecipeBase::VPReductionPHISC:
862+
case VPRecipeBase::VPScalarCastSC:
862863
return true;
863864
case VPRecipeBase::VPInterleaveSC:
864865
case VPRecipeBase::VPBranchOnMaskSC:
@@ -1338,6 +1339,34 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
13381339
Type *getResultType() const { return ResultTy; }
13391340
};
13401341

1342+
/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1343+
class VPScalarCastRecipe : public VPSingleDefRecipe {
1344+
Instruction::CastOps Opcode;
1345+
1346+
Type *ResultTy;
1347+
1348+
Value *generate(VPTransformState &State, unsigned Part);
1349+
1350+
public:
1351+
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
1352+
: VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
1353+
ResultTy(ResultTy) {}
1354+
1355+
~VPScalarCastRecipe() override = default;
1356+
1357+
VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1358+
1359+
void execute(VPTransformState &State) override;
1360+
1361+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1362+
void print(raw_ostream &O, const Twine &Indent,
1363+
VPSlotTracker &SlotTracker) const override;
1364+
#endif
1365+
1366+
/// Returns the result type of the cast.
1367+
Type *getResultType() const { return ResultTy; }
1368+
};
1369+
13411370
/// A recipe for widening Call instructions.
13421371
class VPWidenCallRecipe : public VPSingleDefRecipe {
13431372
/// ID of the vector intrinsic to call when widening the call. If set the
@@ -2254,10 +2283,9 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
22542283
}
22552284

22562285
/// Check if the induction described by \p Kind, /p Start and \p Step is
2257-
/// canonical, i.e. has the same start, step (of 1), and type as the
2258-
/// canonical IV.
2286+
/// canonical, i.e. has the same start and step (of 1) as the canonical IV.
22592287
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start,
2260-
VPValue *Step, Type *Ty) const;
2288+
VPValue *Step) const;
22612289
};
22622290

22632291
/// A recipe for generating the active lane mask for the vector loop that is
@@ -2320,10 +2348,6 @@ class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
23202348
/// an IV with different start and step values, using Start + CanonicalIV *
23212349
/// Step.
23222350
class VPDerivedIVRecipe : public VPSingleDefRecipe {
2323-
/// If not nullptr, the result of the induction will get truncated to
2324-
/// TruncResultTy.
2325-
Type *TruncResultTy;
2326-
23272351
/// Kind of the induction.
23282352
const InductionDescriptor::InductionKind Kind;
23292353
/// If not nullptr, the floating point induction binary operator. Must be set
@@ -2332,10 +2356,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
23322356

23332357
public:
23342358
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
2335-
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
2336-
Type *TruncResultTy)
2359+
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
23372360
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
2338-
TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()),
2361+
Kind(IndDesc.getKind()),
23392362
FPBinOp(dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())) {
23402363
}
23412364

@@ -2354,8 +2377,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
23542377
#endif
23552378

23562379
Type *getScalarType() const {
2357-
return TruncResultTy ? TruncResultTy
2358-
: getStartValue()->getLiveInIRValue()->getType();
2380+
return getStartValue()->getLiveInIRValue()->getType();
23592381
}
23602382

23612383
VPValue *getStartValue() const { return getOperand(0); }

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,13 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
230230
return V->getUnderlyingValue()->getType();
231231
})
232232
.Case<VPWidenCastRecipe>(
233-
[](const VPWidenCastRecipe *R) { return R->getResultType(); });
233+
[](const VPWidenCastRecipe *R) { return R->getResultType(); })
234+
.Case<VPScalarCastRecipe>(
235+
[](const VPScalarCastRecipe *R) { return R->getResultType(); })
236+
.Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) {
237+
return R->getSCEV()->getType();
238+
});
239+
234240
assert(ResultTy && "could not infer type for the given VPValue");
235241
CachedTypes[V] = ResultTy;
236242
return ResultTy;

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 58 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
117117
switch (getVPDefID()) {
118118
case VPDerivedIVSC:
119119
case VPPredInstPHISC:
120+
case VPScalarCastSC:
120121
return false;
121122
case VPInstructionSC:
122123
switch (cast<VPInstruction>(this)->getOpcode()) {
@@ -1096,9 +1097,6 @@ void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
10961097
getCanonicalIV()->printAsOperand(O, SlotTracker);
10971098
O << " * ";
10981099
getStepValue()->printAsOperand(O, SlotTracker);
1099-
1100-
if (TruncResultTy)
1101-
O << " (truncated to " << *TruncResultTy << ")";
11021100
}
11031101
#endif
11041102

@@ -1117,13 +1115,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
11171115

11181116
// Ensure step has the same type as that of scalar IV.
11191117
Type *BaseIVTy = BaseIV->getType()->getScalarType();
1120-
if (BaseIVTy != Step->getType()) {
1121-
// TODO: Also use VPDerivedIVRecipe when only the step needs truncating, to
1122-
// avoid separate truncate here.
1123-
assert(Step->getType()->isIntegerTy() &&
1124-
"Truncation requires an integer step");
1125-
Step = State.Builder.CreateTrunc(Step, BaseIVTy);
1126-
}
1118+
assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
11271119

11281120
// We build scalar steps for both integer and floating-point induction
11291121
// variables. Here, we determine the kind of arithmetic we will perform.
@@ -1469,6 +1461,58 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
14691461
}
14701462
#endif
14711463

1464+
/// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1465+
/// if it is either defined outside the vector region or its operand is known to
1466+
/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1467+
/// TODO: Uniformity should be associated with a VPValue and there should be a
1468+
/// generic way to check.
1469+
static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) {
1470+
return C->isDefinedOutsideVectorRegions() ||
1471+
isa<VPDerivedIVRecipe>(C->getOperand(0)) ||
1472+
isa<VPCanonicalIVPHIRecipe>(C->getOperand(0));
1473+
}
1474+
1475+
Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) {
1476+
assert(vputils::onlyFirstLaneUsed(this) &&
1477+
"Codegen only implemented for first lane.");
1478+
switch (Opcode) {
1479+
case Instruction::SExt:
1480+
case Instruction::ZExt:
1481+
case Instruction::Trunc: {
1482+
// Note: SExt/ZExt not used yet.
1483+
Value *Op = State.get(getOperand(0), VPIteration(Part, 0));
1484+
return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy);
1485+
}
1486+
default:
1487+
llvm_unreachable("opcode not implemented yet");
1488+
}
1489+
}
1490+
1491+
void VPScalarCastRecipe ::execute(VPTransformState &State) {
1492+
bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this);
1493+
for (unsigned Part = 0; Part != State.UF; ++Part) {
1494+
Value *Res;
1495+
// Only generate a single instance, if the recipe is uniform across UFs and
1496+
// VFs.
1497+
if (Part > 0 && IsUniformAcrossVFsAndUFs)
1498+
Res = State.get(this, VPIteration(0, 0));
1499+
else
1500+
Res = generate(State, Part);
1501+
State.set(this, Res, VPIteration(Part, 0));
1502+
}
1503+
}
1504+
1505+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1506+
void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1507+
VPSlotTracker &SlotTracker) const {
1508+
O << Indent << "SCALAR-CAST ";
1509+
printAsOperand(O, SlotTracker);
1510+
O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1511+
printOperands(O, SlotTracker);
1512+
O << " to " << *ResultTy;
1513+
}
1514+
#endif
1515+
14721516
void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
14731517
assert(State.Instance && "Branch on Mask works only on single instance.");
14741518

@@ -1587,10 +1631,10 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
15871631
#endif
15881632

15891633
bool VPCanonicalIVPHIRecipe::isCanonical(
1590-
InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step,
1591-
Type *Ty) const {
1592-
// The types must match and it must be an integer induction.
1593-
if (Ty != getScalarType() || Kind != InductionDescriptor::IK_IntInduction)
1634+
InductionDescriptor::InductionKind Kind, VPValue *Start,
1635+
VPValue *Step) const {
1636+
// Must be an integer induction.
1637+
if (Kind != InductionDescriptor::IK_IntInduction)
15941638
return false;
15951639
// Start must match the start value of this canonical induction.
15961640
if (Start != getStartValue())

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -491,17 +491,39 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) {
491491

492492
static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
493493
ScalarEvolution &SE, Instruction *TruncI,
494-
Type *IVTy, VPValue *StartV,
495-
VPValue *Step) {
494+
VPValue *StartV, VPValue *Step) {
496495
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
497496
auto IP = HeaderVPBB->getFirstNonPhi();
498497
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
499-
Type *TruncTy = TruncI ? TruncI->getType() : IVTy;
500-
VPValue *BaseIV = CanonicalIV;
501-
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step, TruncTy)) {
502-
BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step,
503-
TruncI ? TruncI->getType() : nullptr);
504-
HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
498+
VPSingleDefRecipe *BaseIV = CanonicalIV;
499+
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) {
500+
BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step);
501+
HeaderVPBB->insert(BaseIV, IP);
502+
}
503+
504+
// Truncate base induction if needed.
505+
VPTypeAnalysis TypeInfo(SE.getContext());
506+
Type *ResultTy = TypeInfo.inferScalarType(BaseIV);
507+
if (TruncI) {
508+
Type *TruncTy = TruncI->getType();
509+
assert(ResultTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits() &&
510+
"Not truncating.");
511+
assert(ResultTy->isIntegerTy() && "Truncation requires an integer type");
512+
BaseIV = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy);
513+
HeaderVPBB->insert(BaseIV, IP);
514+
ResultTy = TruncTy;
515+
}
516+
517+
// Truncate step if needed.
518+
Type *StepTy = TypeInfo.inferScalarType(Step);
519+
if (ResultTy != StepTy) {
520+
assert(StepTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits() &&
521+
"Not truncating.");
522+
assert(StepTy->isIntegerTy() && "Truncation requires an integer type");
523+
Step = new VPScalarCastRecipe(Instruction::Trunc, Step, ResultTy);
524+
auto *VecPreheader =
525+
cast<VPBasicBlock>(HeaderVPBB->getSingleHierarchicalPredecessor());
526+
VecPreheader->appendRecipe(Step->getDefiningRecipe());
505527
}
506528

507529
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
@@ -523,9 +545,9 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
523545
continue;
524546

525547
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
526-
VPValue *Steps = createScalarIVSteps(
527-
Plan, ID, SE, WideIV->getTruncInst(), WideIV->getPHINode()->getType(),
528-
WideIV->getStartValue(), WideIV->getStepValue());
548+
VPValue *Steps =
549+
createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(),
550+
WideIV->getStartValue(), WideIV->getStepValue());
529551

530552
// Update scalar users of IV to use Step instead.
531553
if (!HasOnlyVectorVFs)

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,7 @@ class VPDef {
350350
VPInterleaveSC,
351351
VPReductionSC,
352352
VPReplicateSC,
353+
VPScalarCastSC,
353354
VPScalarIVStepsSC,
354355
VPVectorPointerSC,
355356
VPWidenCallSC,

llvm/test/Transforms/LoopVectorize/cast-induction.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,12 +83,14 @@ define void @cast_variable_step(i64 %step) {
8383
; VF4: middle.block:
8484
;
8585
; IC2-LABEL: @cast_variable_step(
86+
; IC2: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32
87+
; IC2: br label %vector.body
88+
8689
; IC2-LABEL: vector.body:
8790
; IC2-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ]
8891
; IC2-NEXT: [[MUL:%.+]] = mul i64 %index, %step
8992
; IC2-NEXT: [[OFFSET_IDX:%.+]] = add i64 10, [[MUL]]
9093
; IC2-NEXT: [[TRUNC_OFF:%.+]] = trunc i64 [[OFFSET_IDX]] to i32
91-
; IC2-NEXT: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32
9294
; IC2-NEXT: [[STEP0:%.+]] = mul i32 0, [[TRUNC_STEP]]
9395
; IC2-NEXT: [[T0:%.+]] = add i32 [[TRUNC_OFF]], [[STEP0]]
9496
; IC2-NEXT: [[STEP1:%.+]] = mul i32 1, [[TRUNC_STEP]]

llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,14 +184,15 @@ exit:
184184
; DBG-NEXT: No successors
185185
; DBG-EMPTY:
186186
; DBG-NEXT: vector.ph:
187+
; DBG-NEXT: SCALAR-CAST vp<[[CAST:%.+]]> = trunc ir<1> to i32
187188
; DBG-NEXT: Successor(s): vector loop
188189
; DBG-EMPTY:
189190
; DBG-NEXT: <x1> vector loop: {
190191
; DBG-NEXT: vector.body:
191192
; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
192193
; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]>
193-
; DBG-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<1> (truncated to i32)
194-
; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1>
194+
; DBG-NEXT: SCALAR-CAST vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32
195+
; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, vp<[[CAST]]>
195196
; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]>
196197
; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst>
197198
; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>

llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ define void @test(i16 %x, i64 %y, ptr %ptr) {
4343
; CHECK-NEXT: [[V3:%.*]] = add i8 [[V2]], 1
4444
; CHECK-NEXT: [[CMP15:%.*]] = icmp slt i8 [[V3]], 5
4545
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[INC]]
46-
; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
46+
; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
4747
; CHECK: loop.exit:
4848
; CHECK-NEXT: [[DIV_1:%.*]] = udiv i64 [[Y]], [[ADD]]
4949
; CHECK-NEXT: [[V1:%.*]] = add i64 [[DIV_1]], 1

0 commit comments

Comments
 (0)