diff --git a/llvm/include/llvm/CodeGen/EVLIndVarSimplify.h b/llvm/include/llvm/CodeGen/EVLIndVarSimplify.h
new file mode 100644
index 0000000000000..63e8b74f87d1c
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/EVLIndVarSimplify.h
@@ -0,0 +1,31 @@
+//===- EVLIndVarSimplify.h - Optimize vectorized loops w/ EVL IV-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes a vectorized loop with canonical IV to using EVL-based
+// IV if it was tail-folded by predicated EVL.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_EVLINDVARSIMPLIFY_H
+#define LLVM_CODEGEN_EVLINDVARSIMPLIFY_H
+
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Loop;
+class LPMUpdater;
+
+/// Turn vectorized loops with canonical induction variables into loops that
+/// only use a single EVL-based induction variable.
+struct EVLIndVarSimplifyPass : public PassInfoMixin<EVLIndVarSimplifyPass> {
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM,
+                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 145fd2fac8b56..60b0ee31fc0f4 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -54,6 +54,7 @@ add_llvm_component_library(LLVMCodeGen
   EarlyIfConversion.cpp
   EdgeBundles.cpp
   EHContGuardCatchret.cpp
+  EVLIndVarSimplify.cpp
   ExecutionDomainFix.cpp
   ExpandLargeDivRem.cpp
   ExpandLargeFpConvert.cpp
diff --git a/llvm/lib/CodeGen/EVLIndVarSimplify.cpp b/llvm/lib/CodeGen/EVLIndVarSimplify.cpp
new file mode 100644
index 0000000000000..c730b34ca5e37
--- /dev/null
+++ b/llvm/lib/CodeGen/EVLIndVarSimplify.cpp
@@ -0,0 +1,236 @@
+//===------ EVLIndVarSimplify.cpp - Optimize vectorized loops w/ EVL IV----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes a vectorized loop with canonical IV to using EVL-based
+// IV if it was tail-folded by predicated EVL.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/EVLIndVarSimplify.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/IVDescriptors.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+
+#define DEBUG_TYPE "evl-iv-simplify"
+
+using namespace llvm;
+
+STATISTIC(NumEliminatedCanonicalIV, "Number of canonical IVs we eliminated");
+
+static cl::opt<bool> EnableEVLIndVarSimplify(
+    "enable-evl-indvar-simplify",
+    cl::desc("Enable EVL-based induction variable simplify Pass"), cl::Hidden,
+    cl::init(true));
+
+namespace {
+struct EVLIndVarSimplifyImpl {
+  ScalarEvolution &SE;
+
+  explicit EVLIndVarSimplifyImpl(LoopStandardAnalysisResults &LAR)
+      : SE(LAR.SE) {}
+
+  explicit EVLIndVarSimplifyImpl(ScalarEvolution &SE) : SE(SE) {}
+
+  // Returns true if modify the loop.
+  bool run(Loop &L);
+};
+} // anonymous namespace
+
+static uint32_t getVFFromIndVar(const SCEV *Step, const Function &F) {
+  if (!Step)
+    return 0U;
+
+  // Looking for loops with IV step value in the form of `(<constant VF> x
+  // vscale)`.
+  if (auto *Mul = dyn_cast<SCEVMulExpr>(Step)) {
+    if (Mul->getNumOperands() == 2) {
+      const SCEV *LHS = Mul->getOperand(0);
+      const SCEV *RHS = Mul->getOperand(1);
+      if (auto *Const = dyn_cast<SCEVConstant>(LHS)) {
+        uint64_t V = Const->getAPInt().getLimitedValue();
+        if (isa<SCEVVScale>(RHS) && llvm::isUInt<32>(V))
+          return static_cast<uint32_t>(V);
+      }
+    }
+  }
+
+  // If not, see if the vscale_range of the parent function is a fixed value,
+  // which makes the step value to be replaced by a constant.
+  if (F.hasFnAttribute(Attribute::VScaleRange))
+    if (auto *ConstStep = dyn_cast<SCEVConstant>(Step)) {
+      APInt V = ConstStep->getAPInt().abs();
+      ConstantRange CR = llvm::getVScaleRange(&F, 64);
+      if (const APInt *Fixed = CR.getSingleElement()) {
+        V = V.zextOrTrunc(Fixed->getBitWidth());
+        uint64_t VF = V.udiv(*Fixed).getLimitedValue();
+        if (VF && llvm::isUInt<32>(VF) &&
+            // Make sure step is divisible by vscale.
+            V.urem(*Fixed).isZero())
+          return static_cast<uint32_t>(VF);
+      }
+    }
+
+  return 0U;
+}
+
+bool EVLIndVarSimplifyImpl::run(Loop &L) {
+  if (!EnableEVLIndVarSimplify)
+    return false;
+
+  BasicBlock *LatchBlock = L.getLoopLatch();
+  ICmpInst *OrigLatchCmp = L.getLatchCmpInst();
+  if (!LatchBlock || !OrigLatchCmp)
+    return false;
+
+  InductionDescriptor IVD;
+  PHINode *IndVar = L.getInductionVariable(SE);
+  if (!IndVar || !L.getInductionDescriptor(SE, IVD)) {
+    LLVM_DEBUG(dbgs() << "Cannot retrieve IV from loop " << L.getName()
+                      << "\n");
+    return false;
+  }
+
+  BasicBlock *InitBlock, *BackEdgeBlock;
+  if (!L.getIncomingAndBackEdge(InitBlock, BackEdgeBlock)) {
+    LLVM_DEBUG(dbgs() << "Expect unique incoming and backedge in "
+                      << L.getName() << "\n");
+    return false;
+  }
+
+  // Retrieve the loop bounds.
+  std::optional<Loop::LoopBounds> Bounds = L.getBounds(SE);
+  if (!Bounds) {
+    LLVM_DEBUG(dbgs() << "Could not obtain the bounds for loop " << L.getName()
+                      << "\n");
+    return false;
+  }
+  Value *CanonicalIVInit = &Bounds->getInitialIVValue();
+  Value *CanonicalIVFinal = &Bounds->getFinalIVValue();
+
+  const SCEV *StepV = IVD.getStep();
+  uint32_t VF = getVFFromIndVar(StepV, *L.getHeader()->getParent());
+  if (!VF) {
+    LLVM_DEBUG(dbgs() << "Could not infer VF from IndVar step '" << *StepV
+                      << "'\n");
+    return false;
+  }
+  LLVM_DEBUG(dbgs() << "Using VF=" << VF << " for loop " << L.getName()
+                    << "\n");
+
+  // Try to find the EVL-based induction variable.
+  using namespace PatternMatch;
+  BasicBlock *BB = IndVar->getParent();
+
+  Value *EVLIndVar = nullptr;
+  Value *RemTC = nullptr;
+  Value *TC = nullptr;
+  auto IntrinsicMatch = m_Intrinsic<Intrinsic::experimental_get_vector_length>(
+      m_Value(RemTC), m_SpecificInt(VF),
+      /*Scalable=*/m_SpecificInt(1));
+  for (auto &PN : BB->phis()) {
+    if (&PN == IndVar)
+      continue;
+
+    // Check 1: it has to contain both incoming (init) & backedge blocks
+    // from IndVar.
+    if (PN.getBasicBlockIndex(InitBlock) < 0 ||
+        PN.getBasicBlockIndex(BackEdgeBlock) < 0)
+      continue;
+    // Check 2: EVL index is always increasing, thus its inital value has to be
+    // equal to either the initial IV value (when the canonical IV is also
+    // increasing) or the last IV value (when canonical IV is decreasing).
+    Value *Init = PN.getIncomingValueForBlock(InitBlock);
+    using Direction = Loop::LoopBounds::Direction;
+    switch (Bounds->getDirection()) {
+    case Direction::Increasing:
+      if (Init != CanonicalIVInit)
+        continue;
+      break;
+    case Direction::Decreasing:
+      if (Init != CanonicalIVFinal)
+        continue;
+      break;
+    case Direction::Unknown:
+      // To be more permissive and see if either the initial or final IV value
+      // matches PN's init value.
+      if (Init != CanonicalIVInit && Init != CanonicalIVFinal)
+        continue;
+      break;
+    }
+    Value *RecValue = PN.getIncomingValueForBlock(BackEdgeBlock);
+    assert(RecValue);
+
+    LLVM_DEBUG(dbgs() << "Found candidate PN of EVL-based IndVar: " << PN
+                      << "\n");
+
+    // Check 3: Pattern match to find the EVL-based index and total trip count
+    // (TC).
+    if (match(RecValue,
+              m_c_Add(m_ZExtOrSelf(IntrinsicMatch), m_Specific(&PN))) &&
+        match(RemTC, m_Sub(m_Value(TC), m_Specific(&PN)))) {
+      EVLIndVar = RecValue;
+      break;
+    }
+  }
+
+  if (!EVLIndVar || !TC)
+    return false;
+
+  LLVM_DEBUG(dbgs() << "Using " << *EVLIndVar << " for EVL-based IndVar\n");
+
+  // Create an EVL-based comparison and replace the branch to use it as
+  // predicate.
+
+  // Loop::getLatchCmpInst check at the beginning of this function has ensured
+  // that latch block ends in a conditional branch.
+  auto *LatchBranch = cast<BranchInst>(LatchBlock->getTerminator());
+  assert(LatchBranch->getNumSuccessors() == 2);
+  ICmpInst::Predicate Pred;
+  if (LatchBranch->getSuccessor(0) == L.getHeader())
+    Pred = ICmpInst::ICMP_ULT;
+  else
+    Pred = ICmpInst::ICMP_UGE;
+
+  IRBuilder<> Builder(OrigLatchCmp);
+  auto *NewLatchCmp = Builder.CreateICmp(Pred, EVLIndVar, TC);
+  OrigLatchCmp->replaceAllUsesWith(NewLatchCmp);
+
+  // llvm::RecursivelyDeleteDeadPHINode only deletes cycles whose values are
+  // not used outside the cycles. However, in this case the now-RAUW-ed
+  // OrigLatchCmp will be consied a use outside the cycle while in reality it's
+  // practically dead. Thus we need to remove it before calling
+  // RecursivelyDeleteDeadPHINode.
+  (void)RecursivelyDeleteTriviallyDeadInstructions(OrigLatchCmp);
+  if (llvm::RecursivelyDeleteDeadPHINode(IndVar))
+    LLVM_DEBUG(dbgs() << "Removed original IndVar\n");
+
+  ++NumEliminatedCanonicalIV;
+
+  return true;
+}
+
+PreservedAnalyses EVLIndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &LAM,
+                                             LoopStandardAnalysisResults &AR,
+                                             LPMUpdater &U) {
+  if (EVLIndVarSimplifyImpl(AR).run(L))
+    return PreservedAnalyses::allInSet<CFGAnalyses>();
+  return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index a936f5381137c..6b0d5732ce62b 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -86,6 +86,7 @@
 #include "llvm/CodeGen/DwarfEHPrepare.h"
 #include "llvm/CodeGen/EarlyIfConversion.h"
 #include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/EVLIndVarSimplify.h"
 #include "llvm/CodeGen/ExpandLargeDivRem.h"
 #include "llvm/CodeGen/ExpandLargeFpConvert.h"
 #include "llvm/CodeGen/ExpandMemCmp.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index d737ea5ab070a..1806ae9c4abf8 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/CodeGen/EVLIndVarSimplify.h"
 #include "llvm/CodeGen/GlobalMergeFunctions.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
@@ -1273,6 +1274,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
   FPM.addPass(LoopVectorizePass(
       LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
 
+  FPM.addPass(createFunctionToLoopPassAdaptor(EVLIndVarSimplifyPass()));
+
   FPM.addPass(InferAlignmentPass());
   if (IsFullLTO) {
     // The vectorizer may have significantly shortened a loop body; unroll
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 9f0b09278edcc..2f0979ea3d999 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -656,6 +656,7 @@ LOOP_ANALYSIS("should-run-extra-simple-loop-unswitch",
 #endif
 LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass())
 LOOP_PASS("dot-ddg", DDGDotPrinterPass())
+LOOP_PASS("evl-iv-simplify", EVLIndVarSimplifyPass())
 LOOP_PASS("guard-widening", GuardWideningPass())
 LOOP_PASS("extra-simple-loop-unswitch-passes",
               ExtraLoopPassManager<ShouldRunExtraSimpleLoopUnswitch>())
diff --git a/llvm/test/CodeGen/RISCV/evl-iv-simplify.ll b/llvm/test/CodeGen/RISCV/evl-iv-simplify.ll
new file mode 100644
index 0000000000000..33674fd41ce83
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/evl-iv-simplify.ll
@@ -0,0 +1,330 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -mtriple=riscv64 -mattr='+v' --passes='loop(evl-iv-simplify)' < %s | FileCheck %s
+; RUN: opt -S -mtriple=riscv64 -mattr='+v' --passes='loop(evl-iv-simplify),function(simplifycfg,dce)' < %s | FileCheck %s --check-prefix=LOOP-DEL
+
+define void @simple(ptr noalias %a, ptr noalias %b, <vscale x 4 x i32> %c, i64 %N) vscale_range(2, 1024) {
+; CHECK-LABEL: define void @simple(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], <vscale x 4 x i32> [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
+; CHECK-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], 1
+; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]]
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
+; CHECK-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
+; CHECK-NEXT:    [[TMP18:%.*]] = add nsw <vscale x 4 x i32> [[C]], [[VP_OP_LOAD1]]
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0
+; CHECK-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP18]], ptr align 4 [[TMP20]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
+; CHECK-NEXT:    [[TMP21:%.*]] = zext i32 [[TMP12]] to i64
+; CHECK-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
+; CHECK-NEXT:    [[TMP22:%.*]] = icmp uge i64 [[INDEX_EVL_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; CHECK-NEXT:    [[ADD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+;
+; LOOP-DEL-LABEL: define void @simple(
+; LOOP-DEL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], <vscale x 4 x i32> [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; LOOP-DEL-NEXT:  entry:
+; LOOP-DEL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
+; LOOP-DEL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; LOOP-DEL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
+; LOOP-DEL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
+; LOOP-DEL-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
+; LOOP-DEL:       vector.ph:
+; LOOP-DEL-NEXT:    br label [[VECTOR_BODY:%.*]]
+; LOOP-DEL:       vector.body:
+; LOOP-DEL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; LOOP-DEL-NEXT:    [[TMP4:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; LOOP-DEL-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP4]], i32 4, i1 true)
+; LOOP-DEL-NEXT:    [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0
+; LOOP-DEL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP6]]
+; LOOP-DEL-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
+; LOOP-DEL-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP10]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
+; LOOP-DEL-NEXT:    [[TMP11:%.*]] = add nsw <vscale x 4 x i32> [[C]], [[VP_OP_LOAD1]]
+; LOOP-DEL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
+; LOOP-DEL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
+; LOOP-DEL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP11]], ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
+; LOOP-DEL-NEXT:    [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
+; LOOP-DEL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
+; LOOP-DEL-NEXT:    [[TMP15:%.*]] = icmp uge i64 [[INDEX_EVL_NEXT]], [[N]]
+; LOOP-DEL-NEXT:    br i1 [[TMP15]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; LOOP-DEL:       for.body:
+; LOOP-DEL-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; LOOP-DEL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
+; LOOP-DEL-NEXT:    [[ADD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; LOOP-DEL-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; LOOP-DEL-NEXT:    store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4
+; LOOP-DEL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; LOOP-DEL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; LOOP-DEL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; LOOP-DEL:       for.cond.cleanup:
+; LOOP-DEL-NEXT:    ret void
+;
+entry:
+  %0 = sub i64 -1, %N
+  %1 = call i64 @llvm.vscale.i64()
+  %2 = mul i64 %1, 4
+  %3 = icmp ult i64 %0, %2
+  br i1 %3, label %scalar.ph, label %vector.ph
+
+vector.ph:                                        ; preds = %entry
+  %4 = call i64 @llvm.vscale.i64()
+  %5 = mul i64 %4, 4
+  %6 = call i64 @llvm.vscale.i64()
+  %7 = mul i64 %6, 4
+  %8 = sub i64 %7, 1
+  %n.rnd.up = add i64 %N, %8
+  %n.mod.vf = urem i64 %n.rnd.up, %5
+  %n.vec = sub i64 %n.rnd.up, %n.mod.vf
+  %9 = call i64 @llvm.vscale.i64()
+  %10 = mul i64 %9, 4
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %evl.based.iv = phi i64 [ 0, %vector.ph ], [ %index.evl.next, %vector.body ]
+  %11 = sub i64 %N, %evl.based.iv
+  %12 = call i32 @llvm.experimental.get.vector.length.i64(i64 %11, i32 4, i1 true)
+  %13 = add i64 %evl.based.iv, 0
+  %14 = getelementptr inbounds i32, ptr %b, i64 %13
+  %15 = getelementptr inbounds i32, ptr %14, i32 0
+  %vp.op.load = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %15, <vscale x 4 x i1> splat (i1 true), i32 %12)
+  %18 = add nsw <vscale x 4 x i32> %c, %vp.op.load
+  %19 = getelementptr inbounds i32, ptr %a, i64 %13
+  %20 = getelementptr inbounds i32, ptr %19, i32 0
+  call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %18, ptr align 4 %20, <vscale x 4 x i1> splat (i1 true), i32 %12)
+  %21 = zext i32 %12 to i64
+  %index.evl.next = add i64 %21, %evl.based.iv
+  %index.next = add nuw i64 %index, %10
+  %22 = icmp eq i64 %index.next, %n.vec
+  br i1 %22, label %middle.block, label %vector.body, !llvm.loop !0
+
+middle.block:                                     ; preds = %vector.body
+  br i1 true, label %for.cond.cleanup, label %scalar.ph
+
+scalar.ph:                                        ; preds = %entry, %middle.block
+  %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %scalar.ph
+  %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv
+  %23 = load i32, ptr %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv
+  store i32 %23, ptr %arrayidx4, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !3
+
+for.cond.cleanup:                                 ; preds = %middle.block, %for.body
+  ret void
+}
+
+; Fixed IV steps resulting from vscale_range with a single element
+
+define void @fixed_iv_step(ptr %arg0, ptr %arg1, i64 %N) #0 {
+; CHECK-LABEL: define void @fixed_iv_step(
+; CHECK-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true)
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]]
+; CHECK-NEXT:    tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp uge i64 [[INDEX_EVL_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_END_LOOPEXIT5:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
+; CHECK:       for.end.loopexit5:
+; CHECK-NEXT:    br label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+; LOOP-DEL-LABEL: define void @fixed_iv_step(
+; LOOP-DEL-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
+; LOOP-DEL-NEXT:  entry:
+; LOOP-DEL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
+; LOOP-DEL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
+; LOOP-DEL-NEXT:    br label [[VECTOR_BODY:%.*]]
+; LOOP-DEL:       vector.body:
+; LOOP-DEL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; LOOP-DEL-NEXT:    [[TMP0:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
+; LOOP-DEL-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true)
+; LOOP-DEL-NEXT:    [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]]
+; LOOP-DEL-NEXT:    tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP1]])
+; LOOP-DEL-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; LOOP-DEL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
+; LOOP-DEL-NEXT:    [[TMP3:%.*]] = icmp uge i64 [[INDEX_EVL_NEXT]], [[N]]
+; LOOP-DEL-NEXT:    br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
+; LOOP-DEL:       for.end:
+; LOOP-DEL-NEXT:    ret void
+;
+entry:
+  br label %vector.ph
+
+vector.ph:
+  %n.rnd.up = add nsw i64 %N, 15
+  %n.vec = and i64 %n.rnd.up, -16
+  %broadcast.splatinsert = insertelement <vscale x 2 x ptr> poison, ptr %arg0, i64 0
+  %broadcast.splat = shufflevector <vscale x 2 x ptr> %broadcast.splatinsert, <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:
+  %lsr.iv32 = phi i64 [ %lsr.iv.next33, %vector.body ], [ %n.vec, %vector.ph ]
+  %evl.based.iv = phi i64 [ 0, %vector.ph ], [ %index.evl.next, %vector.body ]
+  %41 = sub i64 %N, %evl.based.iv
+  %42 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %41, i32 2, i1 true)
+  %gep = getelementptr ptr, ptr %arg1, i64 %evl.based.iv
+  tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> %broadcast.splat, ptr align 8 %gep, <vscale x 2 x i1> splat (i1 true), i32 %42)
+  %43 = zext i32 %42 to i64
+  %index.evl.next = add i64 %evl.based.iv, %43
+  %lsr.iv.next33 = add i64 %lsr.iv32, -16
+  %44 = icmp eq i64 %lsr.iv.next33, 0
+  br i1 %44, label %for.end.loopexit5, label %vector.body, !llvm.loop !3
+
+for.end.loopexit5:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; Fixed IV step and trip count
+define void @fixed_iv_step_tc(ptr %arg0, ptr %arg1) #0 {
+; CHECK-LABEL: define void @fixed_iv_step_tc(
+; CHECK-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 87, [[EVL_BASED_IV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true)
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]]
+; CHECK-NEXT:    tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp uge i64 [[INDEX_EVL_NEXT]], 87
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_END_LOOPEXIT5:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
+; CHECK:       for.end.loopexit5:
+; CHECK-NEXT:    br label [[FOR_END:%.*]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+; LOOP-DEL-LABEL: define void @fixed_iv_step_tc(
+; LOOP-DEL-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]]) #[[ATTR1]] {
+; LOOP-DEL-NEXT:  entry:
+; LOOP-DEL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
+; LOOP-DEL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
+; LOOP-DEL-NEXT:    br label [[VECTOR_BODY:%.*]]
+; LOOP-DEL:       vector.body:
+; LOOP-DEL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
+; LOOP-DEL-NEXT:    [[TMP0:%.*]] = sub i64 87, [[EVL_BASED_IV]]
+; LOOP-DEL-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true)
+; LOOP-DEL-NEXT:    [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]]
+; LOOP-DEL-NEXT:    tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP1]])
+; LOOP-DEL-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; LOOP-DEL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
+; LOOP-DEL-NEXT:    [[TMP3:%.*]] = icmp uge i64 [[INDEX_EVL_NEXT]], 87
+; LOOP-DEL-NEXT:    br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
+; LOOP-DEL:       for.end:
+; LOOP-DEL-NEXT:    ret void
+;
+entry:
+  br label %vector.ph
+
+vector.ph:
+  %n.rnd.up = add nsw i64 87, 15
+  %n.vec = and i64 %n.rnd.up, -16
+  %broadcast.splatinsert = insertelement <vscale x 2 x ptr> poison, ptr %arg0, i64 0
+  %broadcast.splat = shufflevector <vscale x 2 x ptr> %broadcast.splatinsert, <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:
+  %lsr.iv32 = phi i64 [ %lsr.iv.next33, %vector.body ], [ %n.vec, %vector.ph ]
+  %evl.based.iv = phi i64 [ 0, %vector.ph ], [ %index.evl.next, %vector.body ]
+  %41 = sub i64 87, %evl.based.iv
+  %42 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %41, i32 2, i1 true)
+  %gep = getelementptr ptr, ptr %arg1, i64 %evl.based.iv
+  tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> %broadcast.splat, ptr align 8 %gep, <vscale x 2 x i1> splat (i1 true), i32 %42)
+  %43 = zext i32 %42 to i64
+  %index.evl.next = add i64 %evl.based.iv, %43
+  %lsr.iv.next33 = add i64 %lsr.iv32, -16
+  %44 = icmp eq i64 %lsr.iv.next33, 0
+  br i1 %44, label %for.end.loopexit5, label %vector.body, !llvm.loop !3
+
+for.end.loopexit5:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+declare i64 @llvm.vscale.i64()
+
+declare i32 @llvm.experimental.get.vector.length.i64(i64, i32 immarg, i1 immarg)
+
+declare <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr nocapture, <vscale x 4 x i1>, i32)
+
+declare void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32>, ptr nocapture, <vscale x 4 x i1>, i32)
+
+attributes #0 = { vscale_range(8,8) }
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.isvectorized", i32 1}
+!2 = !{!"llvm.loop.unroll.runtime.disable"}
+!3 = distinct !{!3, !2, !1}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
+; LOOP-DEL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; LOOP-DEL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; LOOP-DEL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; LOOP-DEL: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 7cf035b0c6f37..cba591e0a0e94 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -256,6 +256,9 @@
 ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis on foo
 ; CHECK-O-NEXT: Running pass: InjectTLIMappings
 ; CHECK-O-NEXT: Running pass: LoopVectorizePass
+; CHECK-O-NEXT: Running pass: LoopSimplifyPass
+; CHECK-O-NEXT: Running pass: LCSSAPass
+; CHECK-O-NEXT: Running pass: EVLIndVarSimplifyPass
 ; CHECK-O-NEXT: Running pass: InferAlignmentPass
 ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index f788db1e338a1..251fb98e290b6 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -118,6 +118,9 @@
 ; CHECK-O23SZ-NEXT: Running analysis: LoopAccessAnalysis on foo
 ; CHECK-O23SZ-NEXT: Running pass: LoopVectorizePass on foo
 ; CHECK-O23SZ-NEXT: Running analysis: DemandedBitsAnalysis on foo
+; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo
+; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo
+; CHECK-O23SZ-NEXT: Running pass: EVLIndVarSimplifyPass on loop
 ; CHECK-O23SZ-NEXT: Running pass: InferAlignmentPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: LoopUnrollPass on foo
 ; CHECK-O23SZ-NEXT: WarnMissedTransformationsPass on foo
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index ed13402e1c4b1..02ee5cd8773f3 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -178,6 +178,9 @@
 ; CHECK-POSTLINK-O-NEXT: Running analysis: LoopAccessAnalysis on foo
 ; CHECK-POSTLINK-O-NEXT: Running pass: InjectTLIMappings
 ; CHECK-POSTLINK-O-NEXT: Running pass: LoopVectorizePass
+; CHECK-POSTLINK-O-NEXT: Running pass: LoopSimplifyPass
+; CHECK-POSTLINK-O-NEXT: Running pass: LCSSAPass
+; CHECK-POSTLINK-O-NEXT: Running pass: EVLIndVarSimplifyPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: InferAlignmentPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: LoopLoadEliminationPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index c82c34f7ff01e..a6f84c740afec 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -163,6 +163,9 @@
 ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis on foo
 ; CHECK-O-NEXT: Running pass: InjectTLIMappings
 ; CHECK-O-NEXT: Running pass: LoopVectorizePass
+; CHECK-O-NEXT: Running pass: LoopSimplifyPass
+; CHECK-O-NEXT: Running pass: LCSSAPass
+; CHECK-O-NEXT: Running pass: EVLIndVarSimplifyPass
 ; CHECK-O-NEXT: Running pass: InferAlignmentPass
 ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index d375747547d61..f303ed5e5cbd4 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -172,6 +172,9 @@
 ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis
 ; CHECK-O-NEXT: Running pass: InjectTLIMappings
 ; CHECK-O-NEXT: Running pass: LoopVectorizePass
+; CHECK-O-NEXT: Running pass: LoopSimplifyPass
+; CHECK-O-NEXT: Running pass: LCSSAPass
+; CHECK-O-NEXT: Running pass: EVLIndVarSimplifyPass
 ; CHECK-O-NEXT: Running pass: InferAlignmentPass
 ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass
 ; CHECK-O-NEXT: Running pass: InstCombinePass
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
index b2d6455a82944..b11a5ed1ff0af 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll
@@ -39,7 +39,7 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c
 ; CHECK-NEXT:    [[TMP7:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER13]]
-; CHECK:       for.body.preheader13:
+; CHECK:       for.body.preheader15:
 ; CHECK-NEXT:    [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[SUM_07_PH:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -154,7 +154,7 @@ define dso_local noundef i32 @sum_prefix_with_sum(ptr %s.coerce0, i64 %s.coerce1
 ; CHECK-NEXT:    [[ADD:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER11]]
-; CHECK:       for.body.preheader11:
+; CHECK:       for.body.preheader13:
 ; CHECK-NEXT:    [[I_07_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER8]] ], [ [[N_VEC]], [[SPAN_CHECKED_ACCESS_EXIT]] ]
 ; CHECK-NEXT:    [[RET_0_LCSSA:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER8]] ], [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY1:%.*]]
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll
index 2fe49a31b7722..4f5b188eeb485 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll
@@ -66,7 +66,7 @@ define i64 @sum_2_at_with_int_conversion(ptr %A, ptr %B, i64 %N) {
 ; CHECK-NEXT:    [[TMP17:%.*]] = tail call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP7]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[LOOP_PREHEADER17]]
-; CHECK:       loop.preheader17:
+; CHECK:       loop.preheader19:
 ; CHECK-NEXT:    [[IV_PH:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[SUM_PH:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
@@ -187,7 +187,7 @@ define i64 @sum_3_at_with_int_conversion(ptr %A, ptr %B, ptr %C, i64 %N) {
 ; CHECK-NEXT:    [[TMP24:%.*]] = tail call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP10]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[LOOP_PREHEADER31]]
-; CHECK:       loop.preheader31:
+; CHECK:       loop.preheader33:
 ; CHECK-NEXT:    [[IV_PH:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[SUM_PH:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]