diff --git a/llvm/include/llvm/CodeGen/EVLIndVarSimplify.h b/llvm/include/llvm/CodeGen/EVLIndVarSimplify.h new file mode 100644 index 0000000000000..63e8b74f87d1c --- /dev/null +++ b/llvm/include/llvm/CodeGen/EVLIndVarSimplify.h @@ -0,0 +1,31 @@ +//===- EVLIndVarSimplify.h - Optimize vectorized loops w/ EVL IV-*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass optimizes a vectorized loop with canonical IV to using EVL-based +// IV if it was tail-folded by predicated EVL. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_EVLINDVARSIMPLIFY_H +#define LLVM_CODEGEN_EVLINDVARSIMPLIFY_H + +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +class Loop; +class LPMUpdater; + +/// Turn vectorized loops with canonical induction variables into loops that +/// only use a single EVL-based induction variable. +struct EVLIndVarSimplifyPass : public PassInfoMixin { + PreservedAnalyses run(Loop &L, LoopAnalysisManager &LAM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; +} // namespace llvm +#endif diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 145fd2fac8b56..60b0ee31fc0f4 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -54,6 +54,7 @@ add_llvm_component_library(LLVMCodeGen EarlyIfConversion.cpp EdgeBundles.cpp EHContGuardCatchret.cpp + EVLIndVarSimplify.cpp ExecutionDomainFix.cpp ExpandLargeDivRem.cpp ExpandLargeFpConvert.cpp diff --git a/llvm/lib/CodeGen/EVLIndVarSimplify.cpp b/llvm/lib/CodeGen/EVLIndVarSimplify.cpp new file mode 100644 index 0000000000000..c730b34ca5e37 --- /dev/null +++ b/llvm/lib/CodeGen/EVLIndVarSimplify.cpp @@ -0,0 +1,236 @@ +//===------ EVLIndVarSimplify.cpp - Optimize vectorized loops w/ EVL IV----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass optimizes a vectorized loop with canonical IV to using EVL-based +// IV if it was tail-folded by predicated EVL. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/EVLIndVarSimplify.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/IVDescriptors.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" + +#define DEBUG_TYPE "evl-iv-simplify" + +using namespace llvm; + +STATISTIC(NumEliminatedCanonicalIV, "Number of canonical IVs we eliminated"); + +static cl::opt EnableEVLIndVarSimplify( + "enable-evl-indvar-simplify", + cl::desc("Enable EVL-based induction variable simplify Pass"), cl::Hidden, + cl::init(true)); + +namespace { +struct EVLIndVarSimplifyImpl { + ScalarEvolution &SE; + + explicit EVLIndVarSimplifyImpl(LoopStandardAnalysisResults &LAR) + : SE(LAR.SE) {} + + explicit EVLIndVarSimplifyImpl(ScalarEvolution &SE) : SE(SE) {} + + // Returns true if modify the loop. + bool run(Loop &L); +}; +} // anonymous namespace + +static uint32_t getVFFromIndVar(const SCEV *Step, const Function &F) { + if (!Step) + return 0U; + + // Looking for loops with IV step value in the form of `( x + // vscale)`. + if (auto *Mul = dyn_cast(Step)) { + if (Mul->getNumOperands() == 2) { + const SCEV *LHS = Mul->getOperand(0); + const SCEV *RHS = Mul->getOperand(1); + if (auto *Const = dyn_cast(LHS)) { + uint64_t V = Const->getAPInt().getLimitedValue(); + if (isa(RHS) && llvm::isUInt<32>(V)) + return static_cast(V); + } + } + } + + // If not, see if the vscale_range of the parent function is a fixed value, + // which makes the step value to be replaced by a constant. + if (F.hasFnAttribute(Attribute::VScaleRange)) + if (auto *ConstStep = dyn_cast(Step)) { + APInt V = ConstStep->getAPInt().abs(); + ConstantRange CR = llvm::getVScaleRange(&F, 64); + if (const APInt *Fixed = CR.getSingleElement()) { + V = V.zextOrTrunc(Fixed->getBitWidth()); + uint64_t VF = V.udiv(*Fixed).getLimitedValue(); + if (VF && llvm::isUInt<32>(VF) && + // Make sure step is divisible by vscale. + V.urem(*Fixed).isZero()) + return static_cast(VF); + } + } + + return 0U; +} + +bool EVLIndVarSimplifyImpl::run(Loop &L) { + if (!EnableEVLIndVarSimplify) + return false; + + BasicBlock *LatchBlock = L.getLoopLatch(); + ICmpInst *OrigLatchCmp = L.getLatchCmpInst(); + if (!LatchBlock || !OrigLatchCmp) + return false; + + InductionDescriptor IVD; + PHINode *IndVar = L.getInductionVariable(SE); + if (!IndVar || !L.getInductionDescriptor(SE, IVD)) { + LLVM_DEBUG(dbgs() << "Cannot retrieve IV from loop " << L.getName() + << "\n"); + return false; + } + + BasicBlock *InitBlock, *BackEdgeBlock; + if (!L.getIncomingAndBackEdge(InitBlock, BackEdgeBlock)) { + LLVM_DEBUG(dbgs() << "Expect unique incoming and backedge in " + << L.getName() << "\n"); + return false; + } + + // Retrieve the loop bounds. + std::optional Bounds = L.getBounds(SE); + if (!Bounds) { + LLVM_DEBUG(dbgs() << "Could not obtain the bounds for loop " << L.getName() + << "\n"); + return false; + } + Value *CanonicalIVInit = &Bounds->getInitialIVValue(); + Value *CanonicalIVFinal = &Bounds->getFinalIVValue(); + + const SCEV *StepV = IVD.getStep(); + uint32_t VF = getVFFromIndVar(StepV, *L.getHeader()->getParent()); + if (!VF) { + LLVM_DEBUG(dbgs() << "Could not infer VF from IndVar step '" << *StepV + << "'\n"); + return false; + } + LLVM_DEBUG(dbgs() << "Using VF=" << VF << " for loop " << L.getName() + << "\n"); + + // Try to find the EVL-based induction variable. + using namespace PatternMatch; + BasicBlock *BB = IndVar->getParent(); + + Value *EVLIndVar = nullptr; + Value *RemTC = nullptr; + Value *TC = nullptr; + auto IntrinsicMatch = m_Intrinsic( + m_Value(RemTC), m_SpecificInt(VF), + /*Scalable=*/m_SpecificInt(1)); + for (auto &PN : BB->phis()) { + if (&PN == IndVar) + continue; + + // Check 1: it has to contain both incoming (init) & backedge blocks + // from IndVar. + if (PN.getBasicBlockIndex(InitBlock) < 0 || + PN.getBasicBlockIndex(BackEdgeBlock) < 0) + continue; + // Check 2: EVL index is always increasing, thus its inital value has to be + // equal to either the initial IV value (when the canonical IV is also + // increasing) or the last IV value (when canonical IV is decreasing). + Value *Init = PN.getIncomingValueForBlock(InitBlock); + using Direction = Loop::LoopBounds::Direction; + switch (Bounds->getDirection()) { + case Direction::Increasing: + if (Init != CanonicalIVInit) + continue; + break; + case Direction::Decreasing: + if (Init != CanonicalIVFinal) + continue; + break; + case Direction::Unknown: + // To be more permissive and see if either the initial or final IV value + // matches PN's init value. + if (Init != CanonicalIVInit && Init != CanonicalIVFinal) + continue; + break; + } + Value *RecValue = PN.getIncomingValueForBlock(BackEdgeBlock); + assert(RecValue); + + LLVM_DEBUG(dbgs() << "Found candidate PN of EVL-based IndVar: " << PN + << "\n"); + + // Check 3: Pattern match to find the EVL-based index and total trip count + // (TC). + if (match(RecValue, + m_c_Add(m_ZExtOrSelf(IntrinsicMatch), m_Specific(&PN))) && + match(RemTC, m_Sub(m_Value(TC), m_Specific(&PN)))) { + EVLIndVar = RecValue; + break; + } + } + + if (!EVLIndVar || !TC) + return false; + + LLVM_DEBUG(dbgs() << "Using " << *EVLIndVar << " for EVL-based IndVar\n"); + + // Create an EVL-based comparison and replace the branch to use it as + // predicate. + + // Loop::getLatchCmpInst check at the beginning of this function has ensured + // that latch block ends in a conditional branch. + auto *LatchBranch = cast(LatchBlock->getTerminator()); + assert(LatchBranch->getNumSuccessors() == 2); + ICmpInst::Predicate Pred; + if (LatchBranch->getSuccessor(0) == L.getHeader()) + Pred = ICmpInst::ICMP_ULT; + else + Pred = ICmpInst::ICMP_UGE; + + IRBuilder<> Builder(OrigLatchCmp); + auto *NewLatchCmp = Builder.CreateICmp(Pred, EVLIndVar, TC); + OrigLatchCmp->replaceAllUsesWith(NewLatchCmp); + + // llvm::RecursivelyDeleteDeadPHINode only deletes cycles whose values are + // not used outside the cycles. However, in this case the now-RAUW-ed + // OrigLatchCmp will be consied a use outside the cycle while in reality it's + // practically dead. Thus we need to remove it before calling + // RecursivelyDeleteDeadPHINode. + (void)RecursivelyDeleteTriviallyDeadInstructions(OrigLatchCmp); + if (llvm::RecursivelyDeleteDeadPHINode(IndVar)) + LLVM_DEBUG(dbgs() << "Removed original IndVar\n"); + + ++NumEliminatedCanonicalIV; + + return true; +} + +PreservedAnalyses EVLIndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &LAM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + if (EVLIndVarSimplifyImpl(AR).run(L)) + return PreservedAnalyses::allInSet(); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index a936f5381137c..6b0d5732ce62b 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -86,6 +86,7 @@ #include "llvm/CodeGen/DwarfEHPrepare.h" #include "llvm/CodeGen/EarlyIfConversion.h" #include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/EVLIndVarSimplify.h" #include "llvm/CodeGen/ExpandLargeDivRem.h" #include "llvm/CodeGen/ExpandLargeFpConvert.h" #include "llvm/CodeGen/ExpandMemCmp.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index d737ea5ab070a..1806ae9c4abf8 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/EVLIndVarSimplify.h" #include "llvm/CodeGen/GlobalMergeFunctions.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" @@ -1273,6 +1274,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, FPM.addPass(LoopVectorizePass( LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); + FPM.addPass(createFunctionToLoopPassAdaptor(EVLIndVarSimplifyPass())); + FPM.addPass(InferAlignmentPass()); if (IsFullLTO) { // The vectorizer may have significantly shortened a loop body; unroll diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 9f0b09278edcc..2f0979ea3d999 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -656,6 +656,7 @@ LOOP_ANALYSIS("should-run-extra-simple-loop-unswitch", #endif LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass()) LOOP_PASS("dot-ddg", DDGDotPrinterPass()) +LOOP_PASS("evl-iv-simplify", EVLIndVarSimplifyPass()) LOOP_PASS("guard-widening", GuardWideningPass()) LOOP_PASS("extra-simple-loop-unswitch-passes", ExtraLoopPassManager()) diff --git a/llvm/test/CodeGen/RISCV/evl-iv-simplify.ll b/llvm/test/CodeGen/RISCV/evl-iv-simplify.ll new file mode 100644 index 0000000000000..33674fd41ce83 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/evl-iv-simplify.ll @@ -0,0 +1,330 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -mtriple=riscv64 -mattr='+v' --passes='loop(evl-iv-simplify)' < %s | FileCheck %s +; RUN: opt -S -mtriple=riscv64 -mattr='+v' --passes='loop(evl-iv-simplify),function(simplifycfg,dce)' < %s | FileCheck %s --check-prefix=LOOP-DEL + +define void @simple(ptr noalias %a, ptr noalias %b, %c, i64 %N) vscale_range(2, 1024) { +; CHECK-LABEL: define void @simple( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] +; CHECK-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]) +; CHECK-NEXT: [[TMP18:%.*]] = add nsw [[C]], [[VP_OP_LOAD1]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 +; CHECK-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP18]], ptr align 4 [[TMP20]], splat (i1 true), i32 [[TMP12]]) +; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp uge i64 [[INDEX_EVL_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[ADD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; +; LOOP-DEL-LABEL: define void @simple( +; LOOP-DEL-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; LOOP-DEL-NEXT: entry: +; LOOP-DEL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N]] +; LOOP-DEL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; LOOP-DEL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] +; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] +; LOOP-DEL: vector.ph: +; LOOP-DEL-NEXT: br label [[VECTOR_BODY:%.*]] +; LOOP-DEL: vector.body: +; LOOP-DEL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; LOOP-DEL-NEXT: [[TMP4:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] +; LOOP-DEL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP4]], i32 4, i1 true) +; LOOP-DEL-NEXT: [[TMP6:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; LOOP-DEL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP6]] +; LOOP-DEL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; LOOP-DEL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP10]], splat (i1 true), i32 [[TMP5]]) +; LOOP-DEL-NEXT: [[TMP11:%.*]] = add nsw [[C]], [[VP_OP_LOAD1]] +; LOOP-DEL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]] +; LOOP-DEL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 +; LOOP-DEL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP11]], ptr align 4 [[TMP13]], splat (i1 true), i32 [[TMP5]]) +; LOOP-DEL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 +; LOOP-DEL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]] +; LOOP-DEL-NEXT: [[TMP15:%.*]] = icmp uge i64 [[INDEX_EVL_NEXT]], [[N]] +; LOOP-DEL-NEXT: br i1 [[TMP15]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; LOOP-DEL: for.body: +; LOOP-DEL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; LOOP-DEL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; LOOP-DEL-NEXT: [[ADD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; LOOP-DEL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; LOOP-DEL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; LOOP-DEL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; LOOP-DEL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; LOOP-DEL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; LOOP-DEL: for.cond.cleanup: +; LOOP-DEL-NEXT: ret void +; +entry: + %0 = sub i64 -1, %N + %1 = call i64 @llvm.vscale.i64() + %2 = mul i64 %1, 4 + %3 = icmp ult i64 %0, %2 + br i1 %3, label %scalar.ph, label %vector.ph + +vector.ph: ; preds = %entry + %4 = call i64 @llvm.vscale.i64() + %5 = mul i64 %4, 4 + %6 = call i64 @llvm.vscale.i64() + %7 = mul i64 %6, 4 + %8 = sub i64 %7, 1 + %n.rnd.up = add i64 %N, %8 + %n.mod.vf = urem i64 %n.rnd.up, %5 + %n.vec = sub i64 %n.rnd.up, %n.mod.vf + %9 = call i64 @llvm.vscale.i64() + %10 = mul i64 %9, 4 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %evl.based.iv = phi i64 [ 0, %vector.ph ], [ %index.evl.next, %vector.body ] + %11 = sub i64 %N, %evl.based.iv + %12 = call i32 @llvm.experimental.get.vector.length.i64(i64 %11, i32 4, i1 true) + %13 = add i64 %evl.based.iv, 0 + %14 = getelementptr inbounds i32, ptr %b, i64 %13 + %15 = getelementptr inbounds i32, ptr %14, i32 0 + %vp.op.load = call @llvm.vp.load.nxv4i32.p0(ptr align 4 %15, splat (i1 true), i32 %12) + %18 = add nsw %c, %vp.op.load + %19 = getelementptr inbounds i32, ptr %a, i64 %13 + %20 = getelementptr inbounds i32, ptr %19, i32 0 + call void @llvm.vp.store.nxv4i32.p0( %18, ptr align 4 %20, splat (i1 true), i32 %12) + %21 = zext i32 %12 to i64 + %index.evl.next = add i64 %21, %evl.based.iv + %index.next = add nuw i64 %index, %10 + %22 = icmp eq i64 %index.next, %n.vec + br i1 %22, label %middle.block, label %vector.body, !llvm.loop !0 + +middle.block: ; preds = %vector.body + br i1 true, label %for.cond.cleanup, label %scalar.ph + +scalar.ph: ; preds = %entry, %middle.block + %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %entry ] + br label %for.body + +for.body: ; preds = %for.body, %scalar.ph + %iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %23 = load i32, ptr %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %23, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !3 + +for.cond.cleanup: ; preds = %middle.block, %for.body + ret void +} + +; Fixed IV steps resulting from vscale_range with a single element + +define void @fixed_iv_step(ptr %arg0, ptr %arg1, i64 %N) #0 { +; CHECK-LABEL: define void @fixed_iv_step( +; CHECK-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[ARG0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]] +; CHECK-NEXT: tail call void @llvm.vp.store.nxv2p0.p0( [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], splat (i1 true), i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i64 [[INDEX_EVL_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_END_LOOPEXIT5:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]] +; CHECK: for.end.loopexit5: +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +; LOOP-DEL-LABEL: define void @fixed_iv_step( +; LOOP-DEL-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; LOOP-DEL-NEXT: entry: +; LOOP-DEL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[ARG0]], i64 0 +; LOOP-DEL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; LOOP-DEL-NEXT: br label [[VECTOR_BODY:%.*]] +; LOOP-DEL: vector.body: +; LOOP-DEL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; LOOP-DEL-NEXT: [[TMP0:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] +; LOOP-DEL-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true) +; LOOP-DEL-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]] +; LOOP-DEL-NEXT: tail call void @llvm.vp.store.nxv2p0.p0( [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], splat (i1 true), i32 [[TMP1]]) +; LOOP-DEL-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; LOOP-DEL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]] +; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp uge i64 [[INDEX_EVL_NEXT]], [[N]] +; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]] +; LOOP-DEL: for.end: +; LOOP-DEL-NEXT: ret void +; +entry: + br label %vector.ph + +vector.ph: + %n.rnd.up = add nsw i64 %N, 15 + %n.vec = and i64 %n.rnd.up, -16 + %broadcast.splatinsert = insertelement poison, ptr %arg0, i64 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + br label %vector.body + +vector.body: + %lsr.iv32 = phi i64 [ %lsr.iv.next33, %vector.body ], [ %n.vec, %vector.ph ] + %evl.based.iv = phi i64 [ 0, %vector.ph ], [ %index.evl.next, %vector.body ] + %41 = sub i64 %N, %evl.based.iv + %42 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %41, i32 2, i1 true) + %gep = getelementptr ptr, ptr %arg1, i64 %evl.based.iv + tail call void @llvm.vp.store.nxv2p0.p0( %broadcast.splat, ptr align 8 %gep, splat (i1 true), i32 %42) + %43 = zext i32 %42 to i64 + %index.evl.next = add i64 %evl.based.iv, %43 + %lsr.iv.next33 = add i64 %lsr.iv32, -16 + %44 = icmp eq i64 %lsr.iv.next33, 0 + br i1 %44, label %for.end.loopexit5, label %vector.body, !llvm.loop !3 + +for.end.loopexit5: + br label %for.end + +for.end: + ret void +} + +; Fixed IV step and trip count +define void @fixed_iv_step_tc(ptr %arg0, ptr %arg1) #0 { +; CHECK-LABEL: define void @fixed_iv_step_tc( +; CHECK-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[ARG0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 87, [[EVL_BASED_IV]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]] +; CHECK-NEXT: tail call void @llvm.vp.store.nxv2p0.p0( [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], splat (i1 true), i32 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp uge i64 [[INDEX_EVL_NEXT]], 87 +; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_END_LOOPEXIT5:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]] +; CHECK: for.end.loopexit5: +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +; LOOP-DEL-LABEL: define void @fixed_iv_step_tc( +; LOOP-DEL-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]]) #[[ATTR1]] { +; LOOP-DEL-NEXT: entry: +; LOOP-DEL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[ARG0]], i64 0 +; LOOP-DEL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; LOOP-DEL-NEXT: br label [[VECTOR_BODY:%.*]] +; LOOP-DEL: vector.body: +; LOOP-DEL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; LOOP-DEL-NEXT: [[TMP0:%.*]] = sub i64 87, [[EVL_BASED_IV]] +; LOOP-DEL-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP0]], i32 2, i1 true) +; LOOP-DEL-NEXT: [[GEP:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[EVL_BASED_IV]] +; LOOP-DEL-NEXT: tail call void @llvm.vp.store.nxv2p0.p0( [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], splat (i1 true), i32 [[TMP1]]) +; LOOP-DEL-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; LOOP-DEL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]] +; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp uge i64 [[INDEX_EVL_NEXT]], 87 +; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]] +; LOOP-DEL: for.end: +; LOOP-DEL-NEXT: ret void +; +entry: + br label %vector.ph + +vector.ph: + %n.rnd.up = add nsw i64 87, 15 + %n.vec = and i64 %n.rnd.up, -16 + %broadcast.splatinsert = insertelement poison, ptr %arg0, i64 0 + %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer + br label %vector.body + +vector.body: + %lsr.iv32 = phi i64 [ %lsr.iv.next33, %vector.body ], [ %n.vec, %vector.ph ] + %evl.based.iv = phi i64 [ 0, %vector.ph ], [ %index.evl.next, %vector.body ] + %41 = sub i64 87, %evl.based.iv + %42 = tail call i32 @llvm.experimental.get.vector.length.i64(i64 %41, i32 2, i1 true) + %gep = getelementptr ptr, ptr %arg1, i64 %evl.based.iv + tail call void @llvm.vp.store.nxv2p0.p0( %broadcast.splat, ptr align 8 %gep, splat (i1 true), i32 %42) + %43 = zext i32 %42 to i64 + %index.evl.next = add i64 %evl.based.iv, %43 + %lsr.iv.next33 = add i64 %lsr.iv32, -16 + %44 = icmp eq i64 %lsr.iv.next33, 0 + br i1 %44, label %for.end.loopexit5, label %vector.body, !llvm.loop !3 + +for.end.loopexit5: + br label %for.end + +for.end: + ret void +} + +declare i64 @llvm.vscale.i64() + +declare i32 @llvm.experimental.get.vector.length.i64(i64, i32 immarg, i1 immarg) + +declare @llvm.vp.load.nxv4i32.p0(ptr nocapture, , i32) + +declare void @llvm.vp.store.nxv4i32.p0(, ptr nocapture, , i32) + +attributes #0 = { vscale_range(8,8) } + +!0 = distinct !{!0, !1, !2} +!1 = !{!"llvm.loop.isvectorized", i32 1} +!2 = !{!"llvm.loop.unroll.runtime.disable"} +!3 = distinct !{!3, !2, !1} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;. +; LOOP-DEL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; LOOP-DEL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; LOOP-DEL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; LOOP-DEL: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 7cf035b0c6f37..cba591e0a0e94 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -256,6 +256,9 @@ ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis on foo ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass +; CHECK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running pass: LCSSAPass +; CHECK-O-NEXT: Running pass: EVLIndVarSimplifyPass ; CHECK-O-NEXT: Running pass: InferAlignmentPass ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-O-NEXT: Running pass: InstCombinePass diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index f788db1e338a1..251fb98e290b6 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -118,6 +118,9 @@ ; CHECK-O23SZ-NEXT: Running analysis: LoopAccessAnalysis on foo ; CHECK-O23SZ-NEXT: Running pass: LoopVectorizePass on foo ; CHECK-O23SZ-NEXT: Running analysis: DemandedBitsAnalysis on foo +; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass on foo +; CHECK-O23SZ-NEXT: Running pass: LCSSAPass on foo +; CHECK-O23SZ-NEXT: Running pass: EVLIndVarSimplifyPass on loop ; CHECK-O23SZ-NEXT: Running pass: InferAlignmentPass on foo ; CHECK-O23SZ-NEXT: Running pass: LoopUnrollPass on foo ; CHECK-O23SZ-NEXT: WarnMissedTransformationsPass on foo diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll index ed13402e1c4b1..02ee5cd8773f3 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -178,6 +178,9 @@ ; CHECK-POSTLINK-O-NEXT: Running analysis: LoopAccessAnalysis on foo ; CHECK-POSTLINK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-POSTLINK-O-NEXT: Running pass: LoopVectorizePass +; CHECK-POSTLINK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-POSTLINK-O-NEXT: Running pass: LCSSAPass +; CHECK-POSTLINK-O-NEXT: Running pass: EVLIndVarSimplifyPass ; CHECK-POSTLINK-O-NEXT: Running pass: InferAlignmentPass ; CHECK-POSTLINK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index c82c34f7ff01e..a6f84c740afec 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -163,6 +163,9 @@ ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis on foo ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass +; CHECK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running pass: LCSSAPass +; CHECK-O-NEXT: Running pass: EVLIndVarSimplifyPass ; CHECK-O-NEXT: Running pass: InferAlignmentPass ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-O-NEXT: Running pass: InstCombinePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index d375747547d61..f303ed5e5cbd4 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -172,6 +172,9 @@ ; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass +; CHECK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running pass: LCSSAPass +; CHECK-O-NEXT: Running pass: EVLIndVarSimplifyPass ; CHECK-O-NEXT: Running pass: InferAlignmentPass ; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass ; CHECK-O-NEXT: Running pass: InstCombinePass diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll index b2d6455a82944..b11a5ed1ff0af 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoist-runtime-checks.ll @@ -39,7 +39,7 @@ define i32 @read_only_loop_with_runtime_check(ptr noundef %array, i32 noundef %c ; CHECK-NEXT: [[TMP7:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER13]] -; CHECK: for.body.preheader13: +; CHECK: for.body.preheader15: ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[SUM_07_PH:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER10]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -154,7 +154,7 @@ define dso_local noundef i32 @sum_prefix_with_sum(ptr %s.coerce0, i64 %s.coerce1 ; CHECK-NEXT: [[ADD:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER11]] -; CHECK: for.body.preheader11: +; CHECK: for.body.preheader13: ; CHECK-NEXT: [[I_07_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER8]] ], [ [[N_VEC]], [[SPAN_CHECKED_ACCESS_EXIT]] ] ; CHECK-NEXT: [[RET_0_LCSSA:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER8]] ], [ [[ADD]], [[SPAN_CHECKED_ACCESS_EXIT]] ] ; CHECK-NEXT: br label [[FOR_BODY1:%.*]] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll index 2fe49a31b7722..4f5b188eeb485 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll @@ -66,7 +66,7 @@ define i64 @sum_2_at_with_int_conversion(ptr %A, ptr %B, i64 %N) { ; CHECK-NEXT: [[TMP17:%.*]] = tail call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP7]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[LOOP_PREHEADER17]] -; CHECK: loop.preheader17: +; CHECK: loop.preheader19: ; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -187,7 +187,7 @@ define i64 @sum_3_at_with_int_conversion(ptr %A, ptr %B, ptr %C, i64 %N) { ; CHECK-NEXT: [[TMP24:%.*]] = tail call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP10]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[LOOP_PREHEADER31]] -; CHECK: loop.preheader31: +; CHECK: loop.preheader33: ; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]]