Skip to content

[LV][EVL] Support cast instruction with EVL-vectorization #108351

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Dec 11, 2024

Conversation

LiqinWeng
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Sep 12, 2024

@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-llvm-transforms

Author: LiqinWeng (LiqinWeng)

Changes

Patch is 25.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108351.diff

7 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+1)
  • (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+72-5)
  • (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+47)
  • (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+9)
  • (modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+1)
  • (modified) llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll (+10-10)
  • (added) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll (+227)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b821da03c16e94..8450ed1765cafa 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4480,6 +4480,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
       case VPDef::VPWidenCallSC:
       case VPDef::VPWidenCanonicalIVSC:
       case VPDef::VPWidenCastSC:
+      case VPDef::VPWidenCastEVLSC:
       case VPDef::VPWidenGEPSC:
       case VPDef::VPWidenSC:
       case VPDef::VPWidenSelectSC:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 64242e43c56bc8..7e45a1c0b36dbd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -921,6 +921,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
     case VPRecipeBase::VPWidenCallSC:
     case VPRecipeBase::VPWidenCanonicalIVSC:
     case VPRecipeBase::VPWidenCastSC:
+    case VPRecipeBase::VPWidenCastEVLSC:
     case VPRecipeBase::VPWidenGEPSC:
     case VPRecipeBase::VPWidenSC:
     case VPRecipeBase::VPWidenEVLSC:
@@ -1111,6 +1112,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
            R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
            R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
            R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
+           R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC ||
            R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
            R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
   }
@@ -1514,19 +1516,28 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
   /// Result type for the cast.
   Type *ResultTy;
 
-public:
-  VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
-                    CastInst &UI)
+protected:
+  VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
+                    VPValue *Op, Type *ResultTy, CastInst &UI)
       : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
         ResultTy(ResultTy) {
     assert(UI.getOpcode() == Opcode &&
            "opcode of underlying cast doesn't match");
   }
 
-  VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+  VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
+                    VPValue *Op, Type *ResultTy)
       : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
         ResultTy(ResultTy) {}
 
+public:
+  VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+                    CastInst &UI)
+      : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy, UI) {}
+
+  VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+      : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy) {}
+
   ~VPWidenCastRecipe() override = default;
 
   VPWidenCastRecipe *clone() override {
@@ -1537,7 +1548,15 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
     return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
   }
 
-  VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
+  static inline bool classof(const VPRecipeBase *R) {
+    return R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
+           R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC;
+  }
+
+  static inline bool classof(const VPUser *U) {
+    auto *R = dyn_cast<VPRecipeBase>(U);
+    return R && classof(R);
+  }
 
   /// Produce widened copies of the cast.
   void execute(VPTransformState &State) override;
@@ -1554,6 +1573,54 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
   Type *getResultType() const { return ResultTy; }
 };
 
+// A recipe for widening cast operation with vector-predication intrinsics with
+/// explicit vector length (EVL).
+class VPWidenCastEVLRecipe : public VPWidenCastRecipe {
+  using VPRecipeWithIRFlags::transferFlags;
+
+public:
+  VPWidenCastEVLRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+                       VPValue &EVL)
+      : VPWidenCastRecipe(VPDef::VPWidenCastEVLSC, Opcode, Op, ResultTy) {
+    addOperand(&EVL);
+  }
+  VPWidenCastEVLRecipe(VPWidenCastRecipe &W, VPValue &EVL)
+      : VPWidenCastEVLRecipe(W.getOpcode(), W.getOperand(0), W.getResultType(),
+                             EVL) {
+    transferFlags(W);
+  }
+
+  ~VPWidenCastEVLRecipe() override = default;
+
+  VPWidenCastEVLRecipe *clone() final {
+    llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
+    return nullptr;
+  }
+
+  VP_CLASSOF_IMPL(VPDef::VPWidenCastEVLSC)
+
+  VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
+  const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
+
+  /// Produce a vp-intrinsic copies of the cast.
+  void execute(VPTransformState &State) final;
+
+  /// Returns true if the recipe only uses the first lane of operand \p Op.
+  bool onlyFirstLaneUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    // EVL in that recipe is always the last operand, thus any use before means
+    // the VPValue should be vectorized.
+    return getEVL() == Op;
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  /// Print the recipe.
+  void print(raw_ostream &O, const Twine &Indent,
+             VPSlotTracker &SlotTracker) const final;
+#endif
+};
+
 /// VPScalarCastRecipe is a recipe to create scalar cast instructions.
 class VPScalarCastRecipe : public VPSingleDefRecipe {
   Instruction::CastOps Opcode;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 351f909ac0279d..62bc032f0c0563 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -69,6 +69,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
   case VPReductionSC:
   case VPWidenCanonicalIVSC:
   case VPWidenCastSC:
+  case VPWidenCastEVLSC:
   case VPWidenGEPSC:
   case VPWidenIntOrFpInductionSC:
   case VPWidenLoadEVLSC:
@@ -112,6 +113,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
   case VPReductionSC:
   case VPWidenCanonicalIVSC:
   case VPWidenCastSC:
+  case VPWidenCastEVLSC:
   case VPWidenGEPSC:
   case VPWidenIntOrFpInductionSC:
   case VPWidenPHISC:
@@ -162,6 +164,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
   case VPScalarIVStepsSC:
   case VPWidenCanonicalIVSC:
   case VPWidenCastSC:
+  case VPWidenCastEVLSC:
   case VPWidenGEPSC:
   case VPWidenIntOrFpInductionSC:
   case VPWidenPHISC:
@@ -1344,6 +1347,40 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
   }
 }
 
+void VPWidenCastEVLRecipe::execute(VPTransformState &State) {
+  unsigned Opcode = getOpcode();
+  State.setDebugLocFrom(getDebugLoc());
+  assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
+                          "explicit vector length.");
+
+  assert(State.get(getOperand(0), 0)->getType()->isVectorTy() &&
+         "VPWidenEVLRecipe should not be used for scalars");
+
+  // TODO: add more cast instruction, eg: fptoint/inttofp/inttoptr/fptofp
+  if (Opcode == Instruction::SExt || Opcode == Instruction::ZExt ||
+      Opcode == Instruction::Trunc) {
+    Value *SrcVal = State.get(getOperand(0), 0);
+    VectorType *SrcTy = cast<VectorType>(SrcVal->getType());
+    VectorType *DsType =
+        VectorType::get(getResultType(), SrcTy->getElementCount());
+
+    IRBuilderBase &BuilderIR = State.Builder;
+    VectorBuilder Builder(BuilderIR);
+    Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
+
+    Builder.setMask(Mask).setEVL(State.get(getEVL(), 0, /*NeedsScalar=*/true));
+    Value *VPInst =
+        Builder.createVectorInstruction(Opcode, DsType, {SrcVal}, "vp.cast");
+    if (VPInst) {
+      if (auto *VecOp = dyn_cast<CastInst>(VPInst))
+        VecOp->copyIRFlags(getUnderlyingInstr());
+    }
+    State.set(this, VPInst, 0);
+    State.addMetadata(VPInst,
+                      dyn_cast_or_null<Instruction>(getUnderlyingValue()));
+  }
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
                               VPSlotTracker &SlotTracker) const {
@@ -1354,6 +1391,16 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
   printOperands(O, SlotTracker);
   O << " to " << *getResultType();
 }
+
+void VPWidenCastEVLRecipe::print(raw_ostream &O, const Twine &Indent,
+                                 VPSlotTracker &SlotTracker) const {
+  O << Indent << "WIDEN-VP ";
+  printAsOperand(O, SlotTracker);
+  O << " = vp." << Instruction::getOpcodeName(getOpcode()) << " ";
+  printFlags(O);
+  printOperands(O, SlotTracker);
+  O << " to " << *getResultType();
+}
 #endif
 
 /// This function adds
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b722ec34ee6fb6..c4ac5a13a1473e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1349,6 +1349,15 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
                   return nullptr;
                 return new VPWidenEVLRecipe(*W, EVL);
               })
+              .Case<VPWidenCastRecipe>(
+                  [&](VPWidenCastRecipe *W) -> VPRecipeBase * {
+                    unsigned Opcode = W->getOpcode();
+                    if (Opcode != Instruction::SExt &&
+                        Opcode != Instruction::ZExt &&
+                        Opcode != Instruction::Trunc)
+                      return nullptr;
+                    return new VPWidenCastEVLRecipe(*W, EVL);
+                  })
               .Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
                 VPValue *NewMask = GetNewMask(Red->getCondOp());
                 return new VPReductionEVLRecipe(*Red, EVL, NewMask);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index b8b2c0bd4d5ff1..17fc0a526e58d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -350,6 +350,7 @@ class VPDef {
     VPWidenCallSC,
     VPWidenCanonicalIVSC,
     VPWidenCastSC,
+    VPWidenCastEVLSC,
     VPWidenGEPSC,
     VPWidenLoadEVLSC,
     VPWidenLoadSC,
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
index 0381f6dae9811f..9c4ebf3d7ff849 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -159,38 +159,38 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
 ; IF-EVL-INLOOP:       vector.body:
 ; IF-EVL-INLOOP-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; IF-EVL-INLOOP-NEXT:    [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-INLOOP-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-INLOOP-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; IF-EVL-INLOOP-NEXT:    [[TMP5:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
 ; IF-EVL-INLOOP-NEXT:    [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP5]], i32 8, i1 true)
 ; IF-EVL-INLOOP-NEXT:    [[TMP7:%.*]] = add i32 [[EVL_BASED_IV]], 0
 ; IF-EVL-INLOOP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]]
 ; IF-EVL-INLOOP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
 ; IF-EVL-INLOOP-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP9]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-INLOOP-NEXT:    [[TMP10:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32>
-; IF-EVL-INLOOP-NEXT:    [[TMP11:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP10]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-INLOOP-NEXT:    [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]]
+; IF-EVL-INLOOP-NEXT:    [[VP_CAST:%.*]] = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> [[VP_OP_LOAD]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-INLOOP-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[VP_CAST]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-INLOOP-NEXT:    [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
 ; IF-EVL-INLOOP-NEXT:    [[INDEX_EVL_NEXT]] = add i32 [[TMP6]], [[EVL_BASED_IV]]
 ; IF-EVL-INLOOP-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]]
-; IF-EVL-INLOOP-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-INLOOP-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-INLOOP-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-INLOOP-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; IF-EVL-INLOOP:       middle.block:
 ; IF-EVL-INLOOP-NEXT:    br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
 ; IF-EVL-INLOOP:       scalar.ph:
 ; IF-EVL-INLOOP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; IF-EVL-INLOOP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; IF-EVL-INLOOP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; IF-EVL-INLOOP-NEXT:    br label [[FOR_BODY:%.*]]
 ; IF-EVL-INLOOP:       for.body:
 ; IF-EVL-INLOOP-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; IF-EVL-INLOOP-NEXT:    [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
 ; IF-EVL-INLOOP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]]
-; IF-EVL-INLOOP-NEXT:    [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-; IF-EVL-INLOOP-NEXT:    [[CONV:%.*]] = sext i16 [[TMP14]] to i32
+; IF-EVL-INLOOP-NEXT:    [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; IF-EVL-INLOOP-NEXT:    [[CONV:%.*]] = sext i16 [[TMP13]] to i32
 ; IF-EVL-INLOOP-NEXT:    [[ADD]] = add nsw i32 [[R_07]], [[CONV]]
 ; IF-EVL-INLOOP-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
 ; IF-EVL-INLOOP-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
 ; IF-EVL-INLOOP-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; IF-EVL-INLOOP:       for.cond.cleanup.loopexit:
-; IF-EVL-INLOOP-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; IF-EVL-INLOOP-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
 ; IF-EVL-INLOOP-NEXT:    br label [[FOR_COND_CLEANUP]]
 ; IF-EVL-INLOOP:       for.cond.cleanup:
 ; IF-EVL-INLOOP-NEXT:    [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
new file mode 100644
index 00000000000000..f1f3d2c88301e1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
@@ -0,0 +1,227 @@
+; REQUIRES: asserts
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=none \
+; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s
+
+define void @vp_sext(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT:  vector.body:
+; IF-EVL-NEXT:    EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT:    EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT:    EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT:    vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT:    CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT:    vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT:    WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT:    WIDEN-VP vp<%8> = vp.sext  ir<%0>, vp<%5> to i64
+; IF-EVL-NEXT:    CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT:    vp<%9> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT:    WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
+; IF-EVL-NEXT:    SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT:    EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT:    EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT:    EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT:  No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+ 
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT:   vector.body:
+; NO-VP-NEXT:     EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT:     vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT:     CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT:     vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT:     WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT:     WIDEN-CAST ir<%conv2> = sext  ir<%0> to i64
+; NO-VP-NEXT:     CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT:     vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT:     WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT:     EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT:     EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT:   No successors
+; NO-VP-NEXT: }
+
+entry:
+  %cmp8 = icmp sgt i64 %N, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %conv2 = sext i32 %0 to i64
+  %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  store i64 %conv2, ptr %arrayidx4, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_zext(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT:   vector.body:
+; IF-EVL-NEXT:     EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT:     EXPLICIT-VECTOR-LENGTH-BASED-...
[truncated]

@LiqinWeng LiqinWeng force-pushed the widen-vp-with-evl branch 4 times, most recently from 636eb6f to 5078445 Compare September 19, 2024 06:54
@LiqinWeng LiqinWeng force-pushed the widen-vp-with-evl branch 2 times, most recently from b251550 to 0b102ba Compare September 23, 2024 08:21
Copy link
Contributor

@Mel-Chen Mel-Chen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies for the late reply since I just finished a long vacation.
I'm happy to see that someone else is also contributing to EVL vectorization. :)

Copy link

github-actions bot commented Sep 24, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@LiqinWeng
Copy link
Contributor Author

Any other questions? Can I merge it? @fhahn @arcbbb @Mel-Chen

@LiqinWeng
Copy link
Contributor Author

Apologies for the late reply since I just finished a long vacation. I'm happy to see that someone else is also contributing to EVL vectorization. :)

Pls give me 'LGTM' :)~~~

@fhahn
Copy link
Contributor

fhahn commented Oct 7, 2024

Same question as for #109614, would using a generic intrinsic recipe work, or is there a need to have dedicated recipes for those?

@LiqinWeng
Copy link
Contributor Author

Same question as for #109614, would using a generic intrinsic recipe work, or is there a need to have dedicated recipes for those?
Are there any relevant job assignments for vectorization that I could participate in? I really want to participate. The main concern is spending a lot of time doing repetitive tasks~~~ :)

@alexey-bataev
Copy link
Member

Same question as for #109614, would using a generic intrinsic recipe work, or is there a need to have dedicated recipes for those?

Generally speaking, it should work. The only high level question here - distinguish between different opcodes in VPlan-based cost model. We'll need to implement something like switch-like approach in a recipe

@LiqinWeng
Copy link
Contributor Author

Same question as for #109614, would using a generic intrinsic recipe work, or is there a need to have dedicated recipes for those?

I will wait for you to merge #110489, and redesign based on it.

@fhahn
Copy link
Contributor

fhahn commented Oct 8, 2024

Same question as for #109614, would using a generic intrinsic recipe work, or is there a need to have dedicated recipes for those?

Generally speaking, it should work. The only high level question here - distinguish between different opcodes in VPlan-based cost model. We'll need to implement something like switch-like approach in a recipe

I think as long as we are using them only for cases where we directly CodeGen the intrinsic and nothing else, it should be fine to rely on TTI to return the correct costs. If there are any inaccuracies, the cost model should likely be improved to fix those

Comment on lines 976 to 977
if (auto *CI = cast_or_null<CallInst>(getUnderlyingValue()))
CI->getOperandBundlesAsDefs(OpBundles);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment on why this is isn't supported for VPINstrinsics.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I pass the Instuction in vplantransform, it will definitely fail here.
image


CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
Instruction *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The next patch someone suggested
image

Comment on lines 983 to 986
// Currently vp-intrinsics only accept FMF flags. llvm.vp.uitofp will get
// Flags of OperationType::NonNegOp && OperationType::FPMathOp.
if (isa<FPMathOperator>(V) && VectorIntrinsicID != Intrinsic::vp_uitofp)
setFlags(V);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we instead drop the flags on construction for widen intrinsics recipes wit EVL?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. If we want to get the IR Flags, we need get the Instruction, which will get the correct OpType. void setFlags(Instruction *I). But I didnt pass the Instruction on VPlanTransform
  2. uitofp has the nneg IR flags, but llvm.vp.uitofp seems no this flag. I'm not sure if I understood something wrong.
  3. How to handle vp instruction IR flags may need to be handled by a separate patch? Currently, it seems that VP Reductions does not handle IR Flags

@@ -1446,7 +1446,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
VPTypeAnalysis TypeInfo(CanonicalIVType);
LLVMContext &Ctx = CanonicalIVType->getContext();
SmallVector<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);

VPValue *AllOneMask =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also use for select VPInstruction below?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A separate NFC patch would be better :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I don't understand what you mean, the vp.select does not have a mask parameter. declare <16 x i32> @llvm.vp.select.v16i32 (<16 x i1> <condition>, <16 x i32> <on_true>, <16 x i32> <on_false>, i32 <evl>)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think Florian means vp.merge.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will create a new patch to deal with it.

Function *VectorF =
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
IsVPIntrinsic
? VPIntrinsic::getOrInsertDeclarationForParams(M, VectorIntrinsicID,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LiqinWeng Yes, that is a problem.
But perhaps we don't have to create exceptions specifically for VP intrinsics. Could you try to extend isVectorIntrinsicWithOverloadTypeAtArg to handle it?

// Currently vp-intrinsics only accept FMF flags. llvm.vp.uitofp will get
// Flags of OperationType::NonNegOp && OperationType::FPMathOp.
if (isa<FPMathOperator>(V) && VectorIntrinsicID != Intrinsic::vp_uitofp)
setFlags(V);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a question here.
Will setFlags really function as we expect if we don't pass the underlying instruction into the recipe? This PR also seems to lack corresponding test cases to verify whether the FMF has been correctly set.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a question here. Will setFlags really function as we expect if we don't pass the underlying instruction into the recipe? This PR also seems to lack corresponding test cases to verify whether the FMF has been correctly set.

In the previous commit I pass the underlying instruction. At present, it seems that the underlying instruction information needs to be passed when setting metedata and flag. I am not sure about the flags of VP intrinsics, and they do not seem to correspond to non-vp intrinsics. For example, uitofp has the nneg IR flags, but llvm.vp.uitofp seems no this flag. I'm not sure if I understood something wrong. :) . And can we temporarily not set flag and resubmit a new ptach to deal with it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vp.cast does not seem to require setting flags. So I dont pass the underlying instruction for cast transform to vp.cast . I am not sure if I understand it correctly. If there is any mistake, please point it out.

@@ -1446,7 +1446,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
VPTypeAnalysis TypeInfo(CanonicalIVType);
LLVMContext &Ctx = CanonicalIVType->getContext();
SmallVector<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);

VPValue *AllOneMask =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A separate NFC patch would be better :)

@LiqinWeng
Copy link
Contributor Author

Any other problems? @fhahn @alexey-bataev @Mel-Chen

Comment on lines 147 to 149
if (VPCastIntrinsic::isVPCast(ID)) {
return OpdIdx == -1 || OpdIdx == 0;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (VPCastIntrinsic::isVPCast(ID)) {
return OpdIdx == -1 || OpdIdx == 0;
}
if (VPCastIntrinsic::isVPCast(ID))
return OpdIdx == -1 || OpdIdx == 0;

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

@@ -973,11 +974,13 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {

CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);

// FIXME: vp.cast and vp.select dont pass the underlying instruction into the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is confusing here I think, it should be at the place where it is not passed, as this needs to be fixed where the recipe is constructed, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed. I will try to add Flags during the recipe construction with new patch

Copy link
Contributor

@fhahn fhahn left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks!

@@ -977,7 +978,7 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {

if (!V->getType()->isVoidTy())
State.set(this, V);
State.addMetadata(V, CI);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change still need?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The following patch will be used, I can remove it first. Pls give LGTM :)

@@ -964,7 +964,8 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
Module *M = State.Builder.GetInsertBlock()->getModule();
Function *VectorF =
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
assert(VectorF && "Can't retrieve vector intrinsic.");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change still need?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably sufficient to keep the existing message

@@ -1495,11 +1495,32 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::getTrue(
IntegerType::getInt1Ty(CI->getContext())));
Ops.push_back(Mask);
;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

redundant

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed

Comment on lines 1488 to 1489
if (VPID == Intrinsic::not_intrinsic)
return nullptr;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good change, but unrelated.
Could you separate a new patch for this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah probably worth splitting off

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

@@ -1485,8 +1485,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
auto *CI = cast<CallInst>(CInst->getUnderlyingInstr());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The TypeSwitch here has grown quite big, it would be good to outline it to a separate function to reduce the nesting level and make things slightly easier to read

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(would be good as. a follow up)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK~

Copy link
Contributor

@Mel-Chen Mel-Chen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LG

@LiqinWeng LiqinWeng merged commit b759020 into llvm:main Dec 11, 2024
8 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Dec 11, 2024

LLVM Buildbot has detected a new failure on builder ml-opt-rel-x86-64 running on ml-opt-rel-x86-64-b1 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/9982

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-rel-x86-64-b1/build/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize  -force-tail-folding-style=data-with-evl  -prefer-predicate-over-epilogue=predicate-dont-vectorize  -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll 2>&1 | /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck --check-prefix=IF-EVL /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+ /b/ml-opt-rel-x86-64-b1/build/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output
+ /b/ml-opt-rel-x86-64-b1/build/bin/FileCheck --check-prefix=IF-EVL /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
/b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll:9:16: error: IF-EVL-NEXT: is not on the line after the previous match
; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
               ^
<stdin>:431:1: note: 'next' match was here
Live-in vp<%0> = VF * UF
^
<stdin>:266:60: note: previous match ended here
VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
                                                           ^
<stdin>:267:1: note: non-matching line after previous match is here
Live-in ir<%11> = VF * UF
^

Input file: <stdin>
Check file: /b/ml-opt-rel-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
        .
        .
        .
      426: LV: Scalarizing: %gep = getelementptr inbounds i32, ptr %b, i64 %iv 
      427: LV: Scalarizing: %gep4 = getelementptr inbounds i64, ptr %a, i64 %iv 
      428: LV: Scalarizing: %iv.next = add nuw nsw i64 %iv, 1 
      429: LV: Scalarizing: %exitcond.not = icmp eq i64 %iv.next, %N 
      430: VPlan 'Initial VPlan for VF={1},UF={1}' { 
      431: Live-in vp<%0> = VF * UF 
next:9     !~~~~~~~~~~~~~~~~~~~~~~~  error: match on wrong line
      432: Live-in vp<%1> = vector-trip-count 
      433: Live-in vp<%2> = backedge-taken count 
      434: Live-in ir<%N> = original trip-count 
      435:  
      436: vector.ph: 
        .
        .
        .
>>>>>>

--

...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Dec 11, 2024

LLVM Buildbot has detected a new failure on builder ml-opt-dev-x86-64 running on ml-opt-dev-x86-64-b2 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/10105

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-dev-x86-64-b1/build/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize  -force-tail-folding-style=data-with-evl  -prefer-predicate-over-epilogue=predicate-dont-vectorize  -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll 2>&1 | /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck --check-prefix=IF-EVL /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+ /b/ml-opt-dev-x86-64-b1/build/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output
+ /b/ml-opt-dev-x86-64-b1/build/bin/FileCheck --check-prefix=IF-EVL /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
/b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll:9:16: error: IF-EVL-NEXT: is not on the line after the previous match
; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
               ^
<stdin>:431:1: note: 'next' match was here
Live-in vp<%0> = VF * UF
^
<stdin>:266:60: note: previous match ended here
VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
                                                           ^
<stdin>:267:1: note: non-matching line after previous match is here
Live-in ir<%11> = VF * UF
^

Input file: <stdin>
Check file: /b/ml-opt-dev-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
        .
        .
        .
      426: LV: Scalarizing: %gep = getelementptr inbounds i32, ptr %b, i64 %iv 
      427: LV: Scalarizing: %gep4 = getelementptr inbounds i64, ptr %a, i64 %iv 
      428: LV: Scalarizing: %iv.next = add nuw nsw i64 %iv, 1 
      429: LV: Scalarizing: %exitcond.not = icmp eq i64 %iv.next, %N 
      430: VPlan 'Initial VPlan for VF={1},UF={1}' { 
      431: Live-in vp<%0> = VF * UF 
next:9     !~~~~~~~~~~~~~~~~~~~~~~~  error: match on wrong line
      432: Live-in vp<%1> = vector-trip-count 
      433: Live-in vp<%2> = backedge-taken count 
      434: Live-in ir<%N> = original trip-count 
      435:  
      436: vector.ph: 
        .
        .
        .
>>>>>>

--

...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Dec 11, 2024

LLVM Buildbot has detected a new failure on builder ml-opt-devrel-x86-64 running on ml-opt-devrel-x86-64-b1 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/9982

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/ml-opt-devrel-x86-64-b1/build/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize  -force-tail-folding-style=data-with-evl  -prefer-predicate-over-epilogue=predicate-dont-vectorize  -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll 2>&1 | /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck --check-prefix=IF-EVL /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+ /b/ml-opt-devrel-x86-64-b1/build/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output
+ /b/ml-opt-devrel-x86-64-b1/build/bin/FileCheck --check-prefix=IF-EVL /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
/b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll:9:16: error: IF-EVL-NEXT: is not on the line after the previous match
; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
               ^
<stdin>:431:1: note: 'next' match was here
Live-in vp<%0> = VF * UF
^
<stdin>:266:60: note: previous match ended here
VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
                                                           ^
<stdin>:267:1: note: non-matching line after previous match is here
Live-in ir<%11> = VF * UF
^

Input file: <stdin>
Check file: /b/ml-opt-devrel-x86-64-b1/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
        .
        .
        .
      426: LV: Scalarizing: %gep = getelementptr inbounds i32, ptr %b, i64 %iv 
      427: LV: Scalarizing: %gep4 = getelementptr inbounds i64, ptr %a, i64 %iv 
      428: LV: Scalarizing: %iv.next = add nuw nsw i64 %iv, 1 
      429: LV: Scalarizing: %exitcond.not = icmp eq i64 %iv.next, %N 
      430: VPlan 'Initial VPlan for VF={1},UF={1}' { 
      431: Live-in vp<%0> = VF * UF 
next:9     !~~~~~~~~~~~~~~~~~~~~~~~  error: match on wrong line
      432: Live-in vp<%1> = vector-trip-count 
      433: Live-in vp<%2> = backedge-taken count 
      434: Live-in ir<%N> = original trip-count 
      435:  
      436: vector.ph: 
        .
        .
        .
>>>>>>

--

...

LiqinWeng added a commit to LiqinWeng/llvm-project that referenced this pull request Dec 11, 2024
LiqinWeng added a commit that referenced this pull request Dec 11, 2024
@llvm-ci
Copy link
Collaborator

llvm-ci commented Dec 11, 2024

LLVM Buildbot has detected a new failure on builder lld-x86_64-ubuntu-fast running on as-builder-4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/8057

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize  -force-tail-folding-style=data-with-evl  -prefer-predicate-over-epilogue=predicate-dont-vectorize  -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll 2>&1 | /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck --check-prefix=IF-EVL /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output
+ /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/build/bin/FileCheck --check-prefix=IF-EVL /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
/home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll:9:16: error: IF-EVL-NEXT: is not on the line after the previous match
; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
               ^
<stdin>:431:1: note: 'next' match was here
Live-in vp<%0> = VF * UF
^
<stdin>:266:60: note: previous match ended here
VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
                                                           ^
<stdin>:267:1: note: non-matching line after previous match is here
Live-in ir<%11> = VF * UF
^

Input file: <stdin>
Check file: /home/buildbot/worker/as-builder-4/ramdisk/lld-x86_64/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
        .
        .
        .
      426: LV: Scalarizing: %gep = getelementptr inbounds i32, ptr %b, i64 %iv 
      427: LV: Scalarizing: %gep4 = getelementptr inbounds i64, ptr %a, i64 %iv 
      428: LV: Scalarizing: %iv.next = add nuw nsw i64 %iv, 1 
      429: LV: Scalarizing: %exitcond.not = icmp eq i64 %iv.next, %N 
      430: VPlan 'Initial VPlan for VF={1},UF={1}' { 
      431: Live-in vp<%0> = VF * UF 
next:9     !~~~~~~~~~~~~~~~~~~~~~~~  error: match on wrong line
      432: Live-in vp<%1> = vector-trip-count 
      433: Live-in vp<%2> = backedge-taken count 
      434: Live-in ir<%N> = original trip-count 
      435:  
      436: vector.ph: 
        .
        .
        .
>>>>>>

--

...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Dec 11, 2024

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-expensive-checks-debian running on gribozavr4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/10466

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize  -force-tail-folding-style=data-with-evl  -prefer-predicate-over-epilogue=predicate-dont-vectorize  -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll 2>&1 | /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck --check-prefix=IF-EVL /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output
+ /b/1/llvm-clang-x86_64-expensive-checks-debian/build/bin/FileCheck --check-prefix=IF-EVL /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
/b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll:9:16: error: IF-EVL-NEXT: is not on the line after the previous match
; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
               ^
<stdin>:431:1: note: 'next' match was here
Live-in vp<%0> = VF * UF
^
<stdin>:266:60: note: previous match ended here
VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
                                                           ^
<stdin>:267:1: note: non-matching line after previous match is here
Live-in ir<%11> = VF * UF
^

Input file: <stdin>
Check file: /b/1/llvm-clang-x86_64-expensive-checks-debian/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
        .
        .
        .
      426: LV: Scalarizing: %gep = getelementptr inbounds i32, ptr %b, i64 %iv 
      427: LV: Scalarizing: %gep4 = getelementptr inbounds i64, ptr %a, i64 %iv 
      428: LV: Scalarizing: %iv.next = add nuw nsw i64 %iv, 1 
      429: LV: Scalarizing: %exitcond.not = icmp eq i64 %iv.next, %N 
      430: VPlan 'Initial VPlan for VF={1},UF={1}' { 
      431: Live-in vp<%0> = VF * UF 
next:9     !~~~~~~~~~~~~~~~~~~~~~~~  error: match on wrong line
      432: Live-in vp<%1> = vector-trip-count 
      433: Live-in vp<%2> = backedge-taken count 
      434: Live-in ir<%N> = original trip-count 
      435:  
      436: vector.ph: 
        .
        .
        .
>>>>>>

--

...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Dec 11, 2024

LLVM Buildbot has detected a new failure on builder clang-x86_64-debian-fast running on gribozavr4 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/14221

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/1/clang-x86_64-debian-fast/llvm.obj/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize  -force-tail-folding-style=data-with-evl  -prefer-predicate-over-epilogue=predicate-dont-vectorize  -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < /b/1/clang-x86_64-debian-fast/llvm.src/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll 2>&1 | /b/1/clang-x86_64-debian-fast/llvm.obj/bin/FileCheck --check-prefix=IF-EVL /b/1/clang-x86_64-debian-fast/llvm.src/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+ /b/1/clang-x86_64-debian-fast/llvm.obj/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output
+ /b/1/clang-x86_64-debian-fast/llvm.obj/bin/FileCheck --check-prefix=IF-EVL /b/1/clang-x86_64-debian-fast/llvm.src/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
/b/1/clang-x86_64-debian-fast/llvm.src/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll:9:16: error: IF-EVL-NEXT: is not on the line after the previous match
; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
               ^
<stdin>:431:1: note: 'next' match was here
Live-in vp<%0> = VF * UF
^
<stdin>:266:60: note: previous match ended here
VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
                                                           ^
<stdin>:267:1: note: non-matching line after previous match is here
Live-in ir<%11> = VF * UF
^

Input file: <stdin>
Check file: /b/1/clang-x86_64-debian-fast/llvm.src/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
        .
        .
        .
      426: LV: Scalarizing: %gep = getelementptr inbounds i32, ptr %b, i64 %iv 
      427: LV: Scalarizing: %gep4 = getelementptr inbounds i64, ptr %a, i64 %iv 
      428: LV: Scalarizing: %iv.next = add nuw nsw i64 %iv, 1 
      429: LV: Scalarizing: %exitcond.not = icmp eq i64 %iv.next, %N 
      430: VPlan 'Initial VPlan for VF={1},UF={1}' { 
      431: Live-in vp<%0> = VF * UF 
next:9     !~~~~~~~~~~~~~~~~~~~~~~~  error: match on wrong line
      432: Live-in vp<%1> = vector-trip-count 
      433: Live-in vp<%2> = backedge-taken count 
      434: Live-in ir<%N> = original trip-count 
      435:  
      436: vector.ph: 
        .
        .
        .
>>>>>>

--

...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Dec 11, 2024

LLVM Buildbot has detected a new failure on builder llvm-x86_64-debian-dylib running on gribozavr4 while building llvm at step 7 "test-build-unified-tree-check-llvm".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/14900

Here is the relevant piece of the build log for the reference
Step 7 (test-build-unified-tree-check-llvm) failure: test (failure)
******************** TEST 'LLVM :: Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 2: /b/1/llvm-x86_64-debian-dylib/build/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize  -force-tail-folding-style=data-with-evl  -prefer-predicate-over-epilogue=predicate-dont-vectorize  -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll 2>&1 | /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck --check-prefix=IF-EVL /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+ /b/1/llvm-x86_64-debian-dylib/build/bin/FileCheck --check-prefix=IF-EVL /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
+ /b/1/llvm-x86_64-debian-dylib/build/bin/opt -passes=loop-vectorize -debug-only=loop-vectorize -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output
/b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll:9:16: error: IF-EVL-NEXT: is not on the line after the previous match
; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
               ^
<stdin>:431:1: note: 'next' match was here
Live-in vp<%0> = VF * UF
^
<stdin>:266:60: note: previous match ended here
VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
                                                           ^
<stdin>:267:1: note: non-matching line after previous match is here
Live-in ir<%11> = VF * UF
^

Input file: <stdin>
Check file: /b/1/llvm-x86_64-debian-dylib/llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll

-dump-input=help explains the following input dump.

Input was:
<<<<<<
        .
        .
        .
      426: LV: Scalarizing: %gep = getelementptr inbounds i32, ptr %b, i64 %iv 
      427: LV: Scalarizing: %gep4 = getelementptr inbounds i64, ptr %a, i64 %iv 
      428: LV: Scalarizing: %iv.next = add nuw nsw i64 %iv, 1 
      429: LV: Scalarizing: %exitcond.not = icmp eq i64 %iv.next, %N 
      430: VPlan 'Initial VPlan for VF={1},UF={1}' { 
      431: Live-in vp<%0> = VF * UF 
next:9     !~~~~~~~~~~~~~~~~~~~~~~~  error: match on wrong line
      432: Live-in vp<%1> = vector-trip-count 
      433: Live-in vp<%2> = backedge-taken count 
      434: Live-in ir<%N> = original trip-count 
      435:  
      436: vector.ph: 
        .
        .
        .
>>>>>>

--

...

@LiqinWeng LiqinWeng deleted the widen-vp-with-evl branch December 11, 2024 07:25
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

8 participants