Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7750,10 +7750,10 @@ //===------------------------------------------------===// // 2. Copy and widen instructions from the old loop into the new loop. - BestVPlan.prepareToExecute(ILV.getOrCreateTripCount(nullptr), - ILV.getOrCreateVectorTripCount(nullptr), - CanonicalIVStartValue, State, - IsEpilogueVectorization); + BestVPlan.prepareToExecute( + ILV.getOrCreateTripCount(nullptr), + ILV.getOrCreateVectorTripCount(nullptr), CanonicalIVStartValue, + Legal->getWidestInductionType(), State, IsEpilogueVectorization); BestVPlan.execute(&State); @@ -8763,11 +8763,10 @@ // loop. static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL, TailFoldingStyle Style) { - Value *StartIdx = ConstantInt::get(IdxTy, 0); - auto *StartV = Plan.getOrAddVPValue(StartIdx); + auto *StartV = Plan.getOrCreateCanonicalIVStartVPValue(); // Add a VPCanonicalIVPHIRecipe starting at 0 to the header. - auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL); + auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, IdxTy, DL); VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); VPBasicBlock *Header = TopRegion->getEntryBasicBlock(); Header->insert(CanonicalIVPHI, Header->begin()); Index: llvm/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.h +++ llvm/lib/Transforms/Vectorize/VPlan.h @@ -1774,11 +1774,13 @@ /// loop). VPWidenCanonicalIVRecipe represents the vector version of the /// canonical induction variable. class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { + Type *IdxTy; DebugLoc DL; public: - VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL) - : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV), DL(DL) {} + VPCanonicalIVPHIRecipe(VPValue *StartV, Type *IdxTy, DebugLoc DL) + : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV), + IdxTy(IdxTy), DL(DL) {} ~VPCanonicalIVPHIRecipe() override = default; @@ -1798,9 +1800,7 @@ #endif /// Returns the scalar type of the induction. - const Type *getScalarType() const { - return getOperand(0)->getLiveInIRValue()->getType(); - } + Type *getScalarType() const { return IdxTy; } /// Returns true if the recipe only uses the first lane of operand \p Op. bool onlyFirstLaneUsed(const VPValue *Op) const override { @@ -1867,7 +1867,7 @@ #endif /// Returns the scalar type of the induction. - const Type *getScalarType() const { + Type *getScalarType() const { return cast(getOperand(0)->getDefiningRecipe()) ->getScalarType(); } @@ -2204,6 +2204,10 @@ /// the tail. It equals TripCount - 1. VPValue *BackedgeTakenCount = nullptr; + /// Represents the start value of the canonical IV, replaced by either 0 for + /// the main loop or the new IV start value for an epilog. + VPValue *CanonicalIVStartVPValue = nullptr; + /// Represents the vector trip count. VPValue VectorTripCount; @@ -2242,8 +2246,8 @@ /// Prepare the plan for execution, setting up the required live-in values. void prepareToExecute(Value *TripCount, Value *VectorTripCount, - Value *CanonicalIVStartValue, VPTransformState &State, - bool IsEpilogueVectorization); + Value *CanonicalIVStartValue, Type *IdxTy, + VPTransformState &State, bool IsEpilogueVectorization); /// Generate the IR code for this VPlan. void execute(VPTransformState *State); @@ -2264,6 +2268,9 @@ return TripCount; } + /// The trip count of the original loop. May be null if not used by the VPlan. + VPValue *getTripCount() { return TripCount; } + /// The backedge taken count of the original loop. VPValue *getOrCreateBackedgeTakenCount() { if (!BackedgeTakenCount) @@ -2271,6 +2278,18 @@ return BackedgeTakenCount; } + /// CanonicalIVStartVPValue is a special live-in value that represents the + /// start value for the getCanonicalIV. This is usually 0, but will use a + /// different value for epilog vectorization. + VPValue *getOrCreateCanonicalIVStartVPValue() { + if (!CanonicalIVStartVPValue) + CanonicalIVStartVPValue = new VPValue(); + return CanonicalIVStartVPValue; + } + + /// The CanonicalIVStartVPValue, which May be null if not used by the VPlan. + VPValue *getCanonicalIVStartVPValue() { return CanonicalIVStartVPValue; } + /// The vector trip count. VPValue &getVectorTripCount() { return VectorTripCount; } Index: llvm/lib/Transforms/Vectorize/VPlan.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.cpp +++ llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -212,7 +212,8 @@ } Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) { - if (!Def->hasDefiningRecipe()) + if (Def != Plan->getCanonicalIVStartVPValue() && + Def != Plan->getTripCount() && !Def->hasDefiningRecipe()) return Def->getLiveInIRValue(); if (hasScalarValue(Def, Instance)) { @@ -595,6 +596,8 @@ delete TripCount; if (BackedgeTakenCount) delete BackedgeTakenCount; + if (CanonicalIVStartVPValue) + delete CanonicalIVStartVPValue; for (auto &P : VPExternalDefs) delete P.second; } @@ -609,7 +612,7 @@ } void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, - Value *CanonicalIVStartValue, + Value *CanonicalIVStartValue, Type *IdxTy, VPTransformState &State, bool IsEpilogueVectorization) { @@ -638,24 +641,11 @@ // When vectorizing the epilogue loop, the canonical induction start value // needs to be changed from zero to the value after the main vector loop. // FIXME: Improve modeling for canonical IV start values in the epilogue loop. - if (CanonicalIVStartValue) { - VPValue *VPV = getOrAddExternalDef(CanonicalIVStartValue); - auto *IV = getCanonicalIV(); - assert(all_of(IV->users(), - [](const VPUser *U) { - if (isa(U) || - isa(U) || - isa(U)) - return true; - auto *VPI = cast(U); - return VPI->getOpcode() == - VPInstruction::CanonicalIVIncrement || - VPI->getOpcode() == - VPInstruction::CanonicalIVIncrementNUW; - }) && - "the canonical IV should only be used by its increments or " - "ScalarIVSteps when resetting the start value"); - IV->setOperand(0, VPV); + if (CanonicalIVStartVPValue && CanonicalIVStartVPValue->getNumUsers()) { + if (!CanonicalIVStartValue) + CanonicalIVStartValue = ConstantInt::get(IdxTy, 0); + for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) + State.set(CanonicalIVStartVPValue, CanonicalIVStartValue, Part); } } @@ -770,6 +760,13 @@ AnyLiveIn = true; } + if (CanonicalIVStartVPValue && CanonicalIVStartVPValue->getNumUsers()) { + O << "\nLive-in "; + CanonicalIVStartVPValue->printAsOperand(O, SlotTracker); + O << " = canonical IV start"; + AnyLiveIn = true; + } + if (AnyLiveIn) O << "\n"; @@ -1118,6 +1115,8 @@ assignSlot(Plan.BackedgeTakenCount); if (Plan.TripCount) assignSlot(Plan.TripCount); + if (Plan.CanonicalIVStartVPValue) + assignSlot(Plan.CanonicalIVStartVPValue); ReversePostOrderTraversal> RPOT(VPBlockDeepTraversalWrapper(Plan.getEntry())); Index: llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1071,9 +1071,11 @@ #endif void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { - Value *Start = getStartValue()->getLiveInIRValue(); + Value *Start = State.get(getStartValue(), VPIteration(0, 0)); + assert(Start->getType() == IdxTy && + "Expected VPCanonicalIVPHIRecipe types to match"); PHINode *EntryPart = PHINode::Create( - Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); + IdxTy, 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); EntryPart->addIncoming(Start, VectorPH); @@ -1087,7 +1089,8 @@ VPSlotTracker &SlotTracker) const { O << Indent << "EMIT "; printAsOperand(O, SlotTracker); - O << " = CANONICAL-INDUCTION"; + O << " = CANONICAL-INDUCTION start: "; + getStartValue()->printAsOperand(O, SlotTracker); } #endif @@ -1097,7 +1100,7 @@ return false; // The start value of ID must match the start value of this canonical // induction. - if (getStartValue()->getLiveInIRValue() != ID.getStartValue()) + if (!PatternMatch::match(ID.getStartValue(), PatternMatch::m_Zero())) return false; ConstantInt *Step = ID.getConstIntStepValue(); Index: llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -588,8 +588,7 @@ !canSimplifyBranchOnCond(Term)))) return; - Type *IdxTy = - Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType(); + Type *IdxTy = Plan.getCanonicalIV()->getScalarType(); const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE); ScalarEvolution &SE = *PSE.getSE(); const SCEV *C = Index: llvm/test/Transforms/LoopVectorize/AArch64/epilog-predicated.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/AArch64/epilog-predicated.ll @@ -0,0 +1,443 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize,simplifycfg -S %s | FileCheck %s --check-prefix=DEFAULT +; RUN: opt -passes=loop-vectorize,simplifycfg -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S %s | FileCheck %s --check-prefix=PREDBODY +; RUN: opt -passes=loop-vectorize,simplifycfg -prefer-predicate-over-epilogue=use-predicated-epilogue -S %s | FileCheck %s --check-prefix=PREDEPI + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-none-eabi" + + +define noundef i32 @add(ptr nocapture noundef readonly %x, ptr noalias nocapture noundef writeonly %y, i32 noundef %n) #0 { +; DEFAULT-LABEL: @add( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; DEFAULT-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; DEFAULT: for.body.preheader: +; DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; DEFAULT-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 8 +; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], [[TMP1]] +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; DEFAULT: vector.ph: +; DEFAULT-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; DEFAULT-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 8 +; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] +; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] +; DEFAULT: vector.body: +; DEFAULT-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 +; DEFAULT-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() +; DEFAULT-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 4 +; DEFAULT-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 0 +; DEFAULT-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 1 +; DEFAULT-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], [[TMP8]] +; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP4]] +; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP9]] +; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 +; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 +; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 +; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP14]] +; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP15]], align 4 +; DEFAULT-NEXT: [[TMP16:%.*]] = add nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; DEFAULT-NEXT: [[TMP17:%.*]] = add nsw [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i32 [[TMP4]] +; DEFAULT-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[Y]], i32 [[TMP9]] +; DEFAULT-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 +; DEFAULT-NEXT: store [[TMP16]], ptr [[TMP20]], align 4 +; DEFAULT-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 +; DEFAULT-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP22]] +; DEFAULT-NEXT: store [[TMP17]], ptr [[TMP23]], align 4 +; DEFAULT-NEXT: [[TMP24:%.*]] = call i32 @llvm.vscale.i32() +; DEFAULT-NEXT: [[TMP25:%.*]] = mul i32 [[TMP24]], 8 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP25]] +; DEFAULT-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; DEFAULT-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; DEFAULT: middle.block: +; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]] +; DEFAULT: scalar.ph: +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] +; DEFAULT: for.body: +; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[INDVARS_IV]] +; DEFAULT-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; DEFAULT-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], 1 +; DEFAULT-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[Y]], i32 [[INDVARS_IV]] +; DEFAULT-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 +; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 +; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[N]] +; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; DEFAULT: for.cond.cleanup: +; DEFAULT-NEXT: ret i32 0 +; +; PREDBODY-LABEL: @add( +; PREDBODY-NEXT: entry: +; PREDBODY-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; PREDBODY-NEXT: br i1 [[CMP6]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; PREDBODY: vector.ph: +; PREDBODY-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; PREDBODY-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4 +; PREDBODY-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; PREDBODY-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4 +; PREDBODY-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 +; PREDBODY-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP4]] +; PREDBODY-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] +; PREDBODY-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; PREDBODY-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() +; PREDBODY-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 4 +; PREDBODY-NEXT: [[TMP7:%.*]] = sub i32 [[N]], [[TMP6]] +; PREDBODY-NEXT: [[TMP8:%.*]] = icmp ugt i32 [[N]], [[TMP6]] +; PREDBODY-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 0 +; PREDBODY-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 [[N]]) +; PREDBODY-NEXT: br label [[VECTOR_BODY:%.*]] +; PREDBODY: vector.body: +; PREDBODY-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDBODY-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDBODY-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 0 +; PREDBODY-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP10]] +; PREDBODY-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 +; PREDBODY-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; PREDBODY-NEXT: [[TMP13:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; PREDBODY-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i32 [[TMP10]] +; PREDBODY-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; PREDBODY-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP13]], ptr [[TMP15]], i32 4, [[ACTIVE_LANE_MASK]]) +; PREDBODY-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[INDEX]], i32 [[TMP9]]) +; PREDBODY-NEXT: [[TMP16:%.*]] = call i32 @llvm.vscale.i32() +; PREDBODY-NEXT: [[TMP17:%.*]] = mul i32 [[TMP16]], 4 +; PREDBODY-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP17]] +; PREDBODY-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; PREDBODY-NEXT: [[TMP19:%.*]] = extractelement [[TMP18]], i32 0 +; PREDBODY-NEXT: br i1 [[TMP19]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; PREDBODY: for.cond.cleanup: +; PREDBODY-NEXT: ret i32 0 +; +; PREDEPI-LABEL: @add( +; PREDEPI-NEXT: entry: +; PREDEPI-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; PREDEPI-NEXT: br i1 [[CMP6]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; PREDEPI: vector.main.loop.iter.check: +; PREDEPI-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 8 +; PREDEPI-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], [[TMP1]] +; PREDEPI-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; PREDEPI: vector.ph: +; PREDEPI-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 8 +; PREDEPI-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] +; PREDEPI-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; PREDEPI-NEXT: br label [[VECTOR_BODY:%.*]] +; PREDEPI: vector.body: +; PREDEPI-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDEPI-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 +; PREDEPI-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 4 +; PREDEPI-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 0 +; PREDEPI-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 1 +; PREDEPI-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], [[TMP8]] +; PREDEPI-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP4]] +; PREDEPI-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP9]] +; PREDEPI-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 +; PREDEPI-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 +; PREDEPI-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; PREDEPI-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 +; PREDEPI-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP14]] +; PREDEPI-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP15]], align 4 +; PREDEPI-NEXT: [[TMP16:%.*]] = add nsw [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; PREDEPI-NEXT: [[TMP17:%.*]] = add nsw [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; PREDEPI-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[Y:%.*]], i32 [[TMP4]] +; PREDEPI-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[Y]], i32 [[TMP9]] +; PREDEPI-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 +; PREDEPI-NEXT: store [[TMP16]], ptr [[TMP20]], align 4 +; PREDEPI-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; PREDEPI-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 +; PREDEPI-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i64 [[TMP22]] +; PREDEPI-NEXT: store [[TMP17]], ptr [[TMP23]], align 4 +; PREDEPI-NEXT: [[TMP24:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP25:%.*]] = mul i32 [[TMP24]], 8 +; PREDEPI-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP25]] +; PREDEPI-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; PREDEPI-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; PREDEPI: middle.block: +; PREDEPI-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; PREDEPI-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[VEC_EPILOG_PH]] +; PREDEPI: vec.epilog.ph: +; PREDEPI-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; PREDEPI-NEXT: [[TMP27:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP28:%.*]] = mul i32 [[TMP27]], 4 +; PREDEPI-NEXT: [[TMP29:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 4 +; PREDEPI-NEXT: [[TMP31:%.*]] = sub i32 [[TMP30]], 1 +; PREDEPI-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP31]] +; PREDEPI-NEXT: [[N_MOD_VF2:%.*]] = urem i32 [[N_RND_UP]], [[TMP28]] +; PREDEPI-NEXT: [[N_VEC3:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF2]] +; PREDEPI-NEXT: [[TMP32:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP33:%.*]] = mul i32 [[TMP32]], 4 +; PREDEPI-NEXT: [[TMP34:%.*]] = sub i32 [[N]], [[TMP33]] +; PREDEPI-NEXT: [[TMP35:%.*]] = icmp ugt i32 [[N]], [[TMP33]] +; PREDEPI-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 0 +; PREDEPI-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[VEC_EPILOG_RESUME_VAL]], i32 [[N]]) +; PREDEPI-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; PREDEPI: vec.epilog.vector.body: +; PREDEPI-NEXT: [[INDEX4:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; PREDEPI-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VEC_EPILOG_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; PREDEPI-NEXT: [[TMP37:%.*]] = add i32 [[INDEX4]], 0 +; PREDEPI-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP37]] +; PREDEPI-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP38]], i32 0 +; PREDEPI-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP39]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; PREDEPI-NEXT: [[TMP40:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; PREDEPI-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[Y]], i32 [[TMP37]] +; PREDEPI-NEXT: [[TMP42:%.*]] = getelementptr inbounds i32, ptr [[TMP41]], i32 0 +; PREDEPI-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP40]], ptr [[TMP42]], i32 4, [[ACTIVE_LANE_MASK]]) +; PREDEPI-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[INDEX4]], i32 [[TMP36]]) +; PREDEPI-NEXT: [[TMP43:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP44:%.*]] = mul i32 [[TMP43]], 4 +; PREDEPI-NEXT: [[INDEX_NEXT5]] = add i32 [[INDEX4]], [[TMP44]] +; PREDEPI-NEXT: [[TMP45:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; PREDEPI-NEXT: [[TMP46:%.*]] = extractelement [[TMP45]], i32 0 +; PREDEPI-NEXT: br i1 [[TMP46]], label [[FOR_COND_CLEANUP]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; PREDEPI: for.cond.cleanup: +; PREDEPI-NEXT: ret i32 0 +; +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, 1 + %arrayidx2 = getelementptr inbounds i32, ptr %y, i32 %indvars.iv + store i32 %add, ptr %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + %exitcond.not = icmp eq i32 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret i32 0 +} + +define noundef i32 @reduce_add(ptr nocapture noundef readonly %x, i32 noundef %n) #0 { +; DEFAULT-LABEL: @reduce_add( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; DEFAULT-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; DEFAULT: for.body.preheader: +; DEFAULT-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; DEFAULT-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 8 +; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], [[TMP1]] +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; DEFAULT: vector.ph: +; DEFAULT-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; DEFAULT-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 8 +; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] +; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] +; DEFAULT: vector.body: +; DEFAULT-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 +; DEFAULT-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() +; DEFAULT-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 4 +; DEFAULT-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 0 +; DEFAULT-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 1 +; DEFAULT-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], [[TMP8]] +; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP4]] +; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP9]] +; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 +; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 +; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 +; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP14]] +; DEFAULT-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP15]], align 4 +; DEFAULT-NEXT: [[TMP16]] = add [[WIDE_LOAD]], [[VEC_PHI]] +; DEFAULT-NEXT: [[TMP17]] = add [[WIDE_LOAD2]], [[VEC_PHI1]] +; DEFAULT-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() +; DEFAULT-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP19]] +; DEFAULT-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; DEFAULT-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; DEFAULT: middle.block: +; DEFAULT-NEXT: [[BIN_RDX:%.*]] = add [[TMP17]], [[TMP16]] +; DEFAULT-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[BIN_RDX]]) +; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]] +; DEFAULT: scalar.ph: +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; DEFAULT-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] +; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] +; DEFAULT: for.body: +; DEFAULT-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; DEFAULT-NEXT: [[S_05:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[INDVARS_IV]] +; DEFAULT-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; DEFAULT-NEXT: [[ADD]] = add nsw i32 [[TMP22]], [[S_05]] +; DEFAULT-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 +; DEFAULT-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[N]] +; DEFAULT-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; DEFAULT: for.cond.cleanup: +; DEFAULT-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] +; DEFAULT-NEXT: ret i32 [[S_0_LCSSA]] +; +; PREDBODY-LABEL: @reduce_add( +; PREDBODY-NEXT: entry: +; PREDBODY-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; PREDBODY-NEXT: br i1 [[CMP4]], label [[VECTOR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; PREDBODY: vector.ph: +; PREDBODY-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; PREDBODY-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4 +; PREDBODY-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; PREDBODY-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4 +; PREDBODY-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 +; PREDBODY-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP4]] +; PREDBODY-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] +; PREDBODY-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; PREDBODY-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() +; PREDBODY-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 4 +; PREDBODY-NEXT: [[TMP7:%.*]] = sub i32 [[N]], [[TMP6]] +; PREDBODY-NEXT: [[TMP8:%.*]] = icmp ugt i32 [[N]], [[TMP6]] +; PREDBODY-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 0 +; PREDBODY-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 [[N]]) +; PREDBODY-NEXT: br label [[VECTOR_BODY:%.*]] +; PREDBODY: vector.body: +; PREDBODY-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDBODY-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDBODY-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] +; PREDBODY-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 0 +; PREDBODY-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP10]] +; PREDBODY-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0 +; PREDBODY-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; PREDBODY-NEXT: [[TMP13:%.*]] = add [[WIDE_MASKED_LOAD]], [[VEC_PHI]] +; PREDBODY-NEXT: [[TMP14]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], [[VEC_PHI]] +; PREDBODY-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[INDEX]], i32 [[TMP9]]) +; PREDBODY-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() +; PREDBODY-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 4 +; PREDBODY-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP16]] +; PREDBODY-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; PREDBODY-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 +; PREDBODY-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; PREDBODY: middle.block: +; PREDBODY-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP14]]) +; PREDBODY-NEXT: br label [[FOR_COND_CLEANUP]] +; PREDBODY: for.cond.cleanup: +; PREDBODY-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ] +; PREDBODY-NEXT: ret i32 [[S_0_LCSSA]] +; +; PREDEPI-LABEL: @reduce_add( +; PREDEPI-NEXT: entry: +; PREDEPI-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; PREDEPI-NEXT: br i1 [[CMP4]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; PREDEPI: vector.main.loop.iter.check: +; PREDEPI-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 8 +; PREDEPI-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], [[TMP1]] +; PREDEPI-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; PREDEPI: vector.ph: +; PREDEPI-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 8 +; PREDEPI-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] +; PREDEPI-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; PREDEPI-NEXT: br label [[VECTOR_BODY:%.*]] +; PREDEPI: vector.body: +; PREDEPI-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; PREDEPI-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; PREDEPI-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] +; PREDEPI-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 +; PREDEPI-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 4 +; PREDEPI-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 0 +; PREDEPI-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], 1 +; PREDEPI-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], [[TMP8]] +; PREDEPI-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP4]] +; PREDEPI-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP9]] +; PREDEPI-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 +; PREDEPI-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 4 +; PREDEPI-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; PREDEPI-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 +; PREDEPI-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP14]] +; PREDEPI-NEXT: [[WIDE_LOAD2:%.*]] = load , ptr [[TMP15]], align 4 +; PREDEPI-NEXT: [[TMP16]] = add [[WIDE_LOAD]], [[VEC_PHI]] +; PREDEPI-NEXT: [[TMP17]] = add [[WIDE_LOAD2]], [[VEC_PHI1]] +; PREDEPI-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 +; PREDEPI-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP19]] +; PREDEPI-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; PREDEPI-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; PREDEPI: middle.block: +; PREDEPI-NEXT: [[BIN_RDX:%.*]] = add [[TMP17]], [[TMP16]] +; PREDEPI-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[BIN_RDX]]) +; PREDEPI-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; PREDEPI-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[VEC_EPILOG_PH]] +; PREDEPI: vec.epilog.ph: +; PREDEPI-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] +; PREDEPI-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; PREDEPI-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP23:%.*]] = mul i32 [[TMP22]], 4 +; PREDEPI-NEXT: [[TMP24:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP25:%.*]] = mul i32 [[TMP24]], 4 +; PREDEPI-NEXT: [[TMP26:%.*]] = sub i32 [[TMP25]], 1 +; PREDEPI-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP26]] +; PREDEPI-NEXT: [[N_MOD_VF3:%.*]] = urem i32 [[N_RND_UP]], [[TMP23]] +; PREDEPI-NEXT: [[N_VEC4:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF3]] +; PREDEPI-NEXT: [[TMP27:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP28:%.*]] = mul i32 [[TMP27]], 4 +; PREDEPI-NEXT: [[TMP29:%.*]] = sub i32 [[N]], [[TMP28]] +; PREDEPI-NEXT: [[TMP30:%.*]] = icmp ugt i32 [[N]], [[TMP28]] +; PREDEPI-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP29]], i32 0 +; PREDEPI-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[VEC_EPILOG_RESUME_VAL]], i32 [[N]]) +; PREDEPI-NEXT: [[TMP32:%.*]] = insertelement zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 +; PREDEPI-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; PREDEPI: vec.epilog.vector.body: +; PREDEPI-NEXT: [[INDEX5:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; PREDEPI-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VEC_EPILOG_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; PREDEPI-NEXT: [[VEC_PHI6:%.*]] = phi [ [[TMP32]], [[VEC_EPILOG_PH]] ], [ [[TMP37:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; PREDEPI-NEXT: [[TMP33:%.*]] = add i32 [[INDEX5]], 0 +; PREDEPI-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[X]], i32 [[TMP33]] +; PREDEPI-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 0 +; PREDEPI-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP35]], i32 4, [[ACTIVE_LANE_MASK]], poison) +; PREDEPI-NEXT: [[TMP36:%.*]] = add [[WIDE_MASKED_LOAD]], [[VEC_PHI6]] +; PREDEPI-NEXT: [[TMP37]] = select [[ACTIVE_LANE_MASK]], [[TMP36]], [[VEC_PHI6]] +; PREDEPI-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[INDEX5]], i32 [[TMP31]]) +; PREDEPI-NEXT: [[TMP38:%.*]] = call i32 @llvm.vscale.i32() +; PREDEPI-NEXT: [[TMP39:%.*]] = mul i32 [[TMP38]], 4 +; PREDEPI-NEXT: [[INDEX_NEXT7]] = add i32 [[INDEX5]], [[TMP39]] +; PREDEPI-NEXT: [[TMP40:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; PREDEPI-NEXT: [[TMP41:%.*]] = extractelement [[TMP40]], i32 0 +; PREDEPI-NEXT: br i1 [[TMP41]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; PREDEPI: vec.epilog.middle.block: +; PREDEPI-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP37]]) +; PREDEPI-NEXT: br label [[FOR_COND_CLEANUP]] +; PREDEPI: for.cond.cleanup: +; PREDEPI-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ [[TMP42]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; PREDEPI-NEXT: ret i32 [[S_0_LCSSA]] +; +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %s.05 = phi i32 [ 0, %for.body.preheader ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %x, i32 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, %s.05 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + %exitcond.not = icmp eq i32 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + ret i32 %s.0.lcssa +} + +attributes #0 = { "target-features"="+sve2" } Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -11,16 +11,17 @@ ; VPLANS-LABEL: Checking a loop in 'simple_memset' ; VPLANS: VPlan 'Initial VPlan for Tail Folded VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' { ; VPLANS-NEXT: Live-in vp<[[TC:%[0-9]+]]> = original trip-count +; VPLANS-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; VPLANS-EMPTY: ; VPLANS-NEXT: vector.ph: -; VPLANS-NEXT: EMIT vp<[[VF:%[0-9]+]]> = VF * Part + ir<0> +; VPLANS-NEXT: EMIT vp<[[VF:%[0-9]+]]> = VF * Part + vp<[[START]]> ; VPLANS-NEXT: EMIT vp<[[NEWTC:%[0-9]+]]> = TC > VF ? TC - VF : 0 vp<[[TC]]> ; VPLANS-NEXT: EMIT vp<[[LANEMASK_ENTRY:%[0-9]+]]> = active lane mask vp<[[VF]]> vp<[[TC]]> ; VPLANS-NEXT: Successor(s): vector loop ; VPLANS-EMPTY: ; VPLANS-NEXT: vector loop: { ; VPLANS-NEXT: vector.body: -; VPLANS-NEXT: EMIT vp<[[INDV:%[0-9]+]]> = CANONICAL-INDUCTION +; VPLANS-NEXT: EMIT vp<[[INDV:%[0-9]+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; VPLANS-NEXT: ACTIVE-LANE-MASK-PHI vp<[[LANEMASK_PHI:%[0-9]+]]> = phi vp<[[LANEMASK_ENTRY]]>, vp<[[LANEMASK_LOOP:%[0-9]+]]> ; VPLANS-NEXT: vp<[[STEP:%[0-9]+]]> = SCALAR-STEPS vp<[[INDV]]>, ir<1> ; VPLANS-NEXT: CLONE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEP]]> Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -10,13 +10,14 @@ ; ; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: EMIT ir<%ptr.iv.1> = WIDEN-POINTER-INDUCTION ir<%start.1>, 1 ; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, 1 ; CHECK-NEXT: WIDEN-GEP Var[Inv] ir<%ptr.iv.2.next> = getelementptr ir<%ptr.iv.2>, ir<1> Index: llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll @@ -11,21 +11,22 @@ ; CHECK-LABEL: LV: Checking a loop in 'test_v4_v4m' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> -; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<%3> +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION start: vp<[[START]]> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<%4> ; CHECK-NEXT: WIDEN ir<%load> = load ir<%gep> ; CHECK-NEXT: REPLICATE ir<%call> = call @foo(ir<%load>) -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, vp<%3> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, vp<%4> ; CHECK-NEXT: WIDEN store ir<%arrayidx>, ir<%call> -; CHECK-NEXT: EMIT vp<%8> = VF * UF +(nuw) vp<%2> -; CHECK-NEXT: EMIT branch-on-count vp<%8> vp<%1> +; CHECK-NEXT: EMIT vp<%9> = VF * UF +(nuw) vp<%3> +; CHECK-NEXT: EMIT branch-on-count vp<%9> vp<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -36,21 +37,22 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> -; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<%3> +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION start: vp<[[START]]> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<%4> ; CHECK-NEXT: WIDEN ir<%load> = load ir<%gep> ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed4_nomask) -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, vp<%3> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, vp<%4> ; CHECK-NEXT: WIDEN store ir<%arrayidx>, ir<%call> -; CHECK-NEXT: EMIT vp<%8> = VF * UF +(nuw) vp<%2> -; CHECK-NEXT: EMIT branch-on-count vp<%8> vp<%1> +; CHECK-NEXT: EMIT vp<%9> = VF * UF +(nuw) vp<%3> +; CHECK-NEXT: EMIT branch-on-count vp<%9> vp<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -66,21 +68,22 @@ ; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4m' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> -; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<%3> +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION start: vp<[[START]]> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<%4> ; CHECK-NEXT: WIDEN ir<%load> = load ir<%gep> ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed2_nomask) -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, vp<%3> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, vp<%4> ; CHECK-NEXT: WIDEN store ir<%arrayidx>, ir<%call> -; CHECK-NEXT: EMIT vp<%8> = VF * UF +(nuw) vp<%2> -; CHECK-NEXT: EMIT branch-on-count vp<%8> vp<%1> +; CHECK-NEXT: EMIT vp<%9> = VF * UF +(nuw) vp<%3> +; CHECK-NEXT: EMIT branch-on-count vp<%9> vp<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -91,21 +94,22 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> -; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<%3> +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION start: vp<[[START]]> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<%4> ; CHECK-NEXT: WIDEN ir<%load> = load ir<%gep> ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>, ir) (using library function: foo_vector_fixed4_mask) -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, vp<%3> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, vp<%4> ; CHECK-NEXT: WIDEN store ir<%arrayidx>, ir<%call> -; CHECK-NEXT: EMIT vp<%8> = VF * UF +(nuw) vp<%2> -; CHECK-NEXT: EMIT branch-on-count vp<%8> vp<%1> +; CHECK-NEXT: EMIT vp<%9> = VF * UF +(nuw) vp<%3> +; CHECK-NEXT: EMIT branch-on-count vp<%9> vp<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -120,21 +124,22 @@ ; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> -; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<%3> +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION start: vp<[[START]]> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<%4> ; CHECK-NEXT: WIDEN ir<%load> = load ir<%gep> ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed2_nomask) -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, vp<%3> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, vp<%4> ; CHECK-NEXT: WIDEN store ir<%arrayidx>, ir<%call> -; CHECK-NEXT: EMIT vp<%8> = VF * UF +(nuw) vp<%2> -; CHECK-NEXT: EMIT branch-on-count vp<%8> vp<%1> +; CHECK-NEXT: EMIT vp<%9> = VF * UF +(nuw) vp<%3> +; CHECK-NEXT: EMIT branch-on-count vp<%9> vp<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -145,21 +150,22 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> -; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<%3> +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION start: vp<[[START]]> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<%4> ; CHECK-NEXT: WIDEN ir<%load> = load ir<%gep> ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @foo(ir<%load>) (using library function: foo_vector_fixed4_nomask) -; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, vp<%3> +; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, vp<%4> ; CHECK-NEXT: WIDEN store ir<%arrayidx>, ir<%call> -; CHECK-NEXT: EMIT vp<%8> = VF * UF +(nuw) vp<%2> -; CHECK-NEXT: EMIT branch-on-count vp<%8> vp<%1> +; CHECK-NEXT: EMIT vp<%9> = VF * UF +(nuw) vp<%3> +; CHECK-NEXT: EMIT branch-on-count vp<%9> vp<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block Index: llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -8,22 +8,23 @@ ; CHECK-LABEL: LV: Checking a loop in 'test' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in vp<%2> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> -; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<%3> +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION start: vp<%2> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<%4> ; CHECK-NEXT: WIDEN ir<%l> = load ir<%gep.src> ; CHECK-NEXT: WIDEN ir<%conv> = fpext ir<%l> ; CHECK-NEXT: WIDEN-CALL ir<%s> = call @llvm.sin.f64(ir<%conv>) (using library function: __simd_sin_v2f64) -; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<%3> +; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<%4> ; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst> -; CHECK-NEXT: EMIT vp<%10> = VF * UF +(nuw) vp<%2> -; CHECK-NEXT: EMIT branch-on-count vp<%10> vp<%1> +; CHECK-NEXT: EMIT vp<%11> = VF * UF +(nuw) vp<%3> +; CHECK-NEXT: EMIT branch-on-count vp<%11> vp<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -34,22 +35,23 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in vp<%2> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> -; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<%3> +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION start: vp<%2> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<%4> ; CHECK-NEXT: WIDEN ir<%l> = load ir<%gep.src> ; CHECK-NEXT: WIDEN ir<%conv> = fpext ir<%l> ; CHECK-NEXT: WIDEN-CALL ir<%s> = call @llvm.sin.f64(ir<%conv>) (using vector intrinsic) -; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<%3> +; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<%4> ; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst> -; CHECK-NEXT: EMIT vp<%10> = VF * UF +(nuw) vp<%2> -; CHECK-NEXT: EMIT branch-on-count vp<%10> vp<%1> +; CHECK-NEXT: EMIT vp<%11> = VF * UF +(nuw) vp<%3> +; CHECK-NEXT: EMIT branch-on-count vp<%11> vp<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block Index: llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -53,22 +53,23 @@ ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { ; CHECK-NEXT: Live-in vp<%2> = vector-trip-count +; CHECK-NEXT: Live-in vp<%3> = canonical IV start ; CHECK: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%n> + vp<%3> * ir<-1> -; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<-1> -; CHECK-NEXT: CLONE ir<%i.0> = add vp<%5>, ir<-1> +; CHECK-NEXT: EMIT vp<%4> = CANONICAL-INDUCTION start: vp<%3> +; CHECK-NEXT: vp<%5> = DERIVED-IV ir<%n> + vp<%4> * ir<-1> +; CHECK-NEXT: vp<%6> = SCALAR-STEPS vp<%5>, ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add vp<%6>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%B>, ir<%idxprom> ; CHECK-NEXT: WIDEN ir<%1> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%add9> = add ir<%1>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr ir<%A>, ir<%idxprom> ; CHECK-NEXT: WIDEN store ir<%arrayidx3>, ir<%add9> -; CHECK-NEXT: EMIT vp<%12> = VF * UF +(nuw) vp<%3> -; CHECK-NEXT: EMIT branch-on-count vp<%12> vp<%2> +; CHECK-NEXT: EMIT vp<%13> = VF * UF +(nuw) vp<%4> +; CHECK-NEXT: EMIT branch-on-count vp<%13> vp<%2> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -187,22 +188,23 @@ ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { ; CHECK-NEXT: Live-in vp<%2> = vector-trip-count +; CHECK-NEXT: Live-in vp<%3> = canonical IV start ; CHECK: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%n> + vp<%3> * ir<-1> -; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<-1> -; CHECK-NEXT: CLONE ir<%i.0> = add vp<%5>, ir<-1> +; CHECK-NEXT: EMIT vp<%4> = CANONICAL-INDUCTION start: vp<%3> +; CHECK-NEXT: vp<%5> = DERIVED-IV ir<%n> + vp<%4> * ir<-1> +; CHECK-NEXT: vp<%6> = SCALAR-STEPS vp<%5>, ir<-1> +; CHECK-NEXT: CLONE ir<%i.0> = add vp<%6>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%B>, ir<%idxprom> ; CHECK-NEXT: WIDEN ir<%1> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr ir<%A>, ir<%idxprom> ; CHECK-NEXT: WIDEN store ir<%arrayidx3>, ir<%conv1> -; CHECK-NEXT: EMIT vp<%12> = VF * UF +(nuw) vp<%3> -; CHECK-NEXT: EMIT branch-on-count vp<%12> vp<%2> +; CHECK-NEXT: EMIT vp<%13> = VF * UF +(nuw) vp<%4> +; CHECK-NEXT: EMIT branch-on-count vp<%13> vp<%2> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block Index: llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -6,24 +6,25 @@ ; CHECK-LABEL: 'test_chained_first_order_recurrences_1' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in vp<%2> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION start: vp<%2> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.1> = phi ir<22>, ir<%for.1.next> -; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.2> = phi ir<33>, vp<%8> -; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%2>, ir<1> -; CHECK-NEXT: CLONE ir<%gep.ptr> = getelementptr ir<%ptr>, vp<%5> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.2> = phi ir<33>, vp<%9> +; CHECK-NEXT: vp<%6> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: CLONE ir<%gep.ptr> = getelementptr ir<%ptr>, vp<%6> ; CHECK-NEXT: WIDEN ir<%for.1.next> = load ir<%gep.ptr> -; CHECK-NEXT: EMIT vp<%8> = first-order splice ir<%for.1> ir<%for.1.next> -; CHECK-NEXT: EMIT vp<%9> = first-order splice ir<%for.2> vp<%8> -; CHECK-NEXT: WIDEN ir<%add> = add vp<%8>, vp<%9> +; CHECK-NEXT: EMIT vp<%9> = first-order splice ir<%for.1> ir<%for.1.next> +; CHECK-NEXT: EMIT vp<%10> = first-order splice ir<%for.2> vp<%9> +; CHECK-NEXT: WIDEN ir<%add> = add vp<%9>, vp<%10> ; CHECK-NEXT: WIDEN store ir<%gep.ptr>, ir<%add> -; CHECK-NEXT: EMIT vp<%11> = VF * UF +(nuw) vp<%2> -; CHECK-NEXT: EMIT branch-on-count vp<%11> vp<%1> +; CHECK-NEXT: EMIT vp<%12> = VF * UF +(nuw) vp<%3> +; CHECK-NEXT: EMIT branch-on-count vp<%12> vp<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block @@ -55,27 +56,28 @@ ; CHECK-LABEL: 'test_chained_first_order_recurrences_3' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in vp<%2> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.1> = phi ir<22>, ir<%for.1.next> -; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.2> = phi ir<33>, vp<%9> -; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.3> = phi ir<33>, vp<%10> -; CHECK-NEXT: vp<%6> = SCALAR-STEPS vp<%2>, ir<1> -; CHECK-NEXT: CLONE ir<%gep.ptr> = getelementptr ir<%ptr>, vp<%6> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.2> = phi ir<33>, vp<%10> +; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.3> = phi ir<33>, vp<%11> +; CHECK-NEXT: vp<%7> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: CLONE ir<%gep.ptr> = getelementptr ir<%ptr>, vp<%7> ; CHECK-NEXT: WIDEN ir<%for.1.next> = load ir<%gep.ptr> -; CHECK-NEXT: EMIT vp<%9> = first-order splice ir<%for.1> ir<%for.1.next> -; CHECK-NEXT: EMIT vp<%10> = first-order splice ir<%for.2> vp<%9> -; CHECK-NEXT: EMIT vp<%11> = first-order splice ir<%for.3> vp<%10> -; CHECK-NEXT: WIDEN ir<%add.1> = add vp<%9>, vp<%10> -; CHECK-NEXT: WIDEN ir<%add.2> = add ir<%add.1>, vp<%11> +; CHECK-NEXT: EMIT vp<%10> = first-order splice ir<%for.1> ir<%for.1.next> +; CHECK-NEXT: EMIT vp<%11> = first-order splice ir<%for.2> vp<%10> +; CHECK-NEXT: EMIT vp<%12> = first-order splice ir<%for.3> vp<%11> +; CHECK-NEXT: WIDEN ir<%add.1> = add vp<%10>, vp<%11> +; CHECK-NEXT: WIDEN ir<%add.2> = add ir<%add.1>, vp<%12> ; CHECK-NEXT: WIDEN store ir<%gep.ptr>, ir<%add.2> -; CHECK-NEXT: EMIT vp<%14> = VF * UF +(nuw) vp<%2> -; CHECK-NEXT: EMIT branch-on-count vp<%14> vp<%1> +; CHECK-NEXT: EMIT vp<%15> = VF * UF +(nuw) vp<%3> +; CHECK-NEXT: EMIT branch-on-count vp<%15> vp<%1> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: Successor(s): middle.block Index: llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -11,13 +11,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: vp<[[STEPS:%.]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> @@ -52,7 +53,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: ; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x> -; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<%6> +; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%add> = add ir<%conv>, ir<%rem> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.dst> ; CHECK-NEXT: Successor(s): pred.store.continue @@ -100,13 +101,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> @@ -168,13 +170,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> ; CHECK-NEXT: EMIT vp<[[WIDEN_CAN:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> @@ -241,13 +244,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> @@ -285,7 +289,7 @@ ; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep> ; CHECK-NEXT: REPLICATE ir<%conv.lv.2> = sext ir<%lv.2> ; CHECK-NEXT: REPLICATE ir<%add.1> = add ir<%conv>, ir<%rem> -; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<%6> +; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]> ; CHECK-NEXT: REPLICATE ir<%add> = add ir<%add.1>, ir<%conv.lv.2> ; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep.dst> ; CHECK-NEXT: Successor(s): pred.store.continue @@ -338,13 +342,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> @@ -411,13 +416,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%.pn> = phi ir<0>, ir<[[L:%.+]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<2> + vp<[[CAN_IV]]> * ir<1> ; CHECK-NEXT: EMIT vp<[[WIDE_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> Index: llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -39,13 +39,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: EMIT vp<[[COND:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%cond0> = icmp ult ir<%iv>, ir<13> Index: llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -6,13 +6,14 @@ ; DBG-LABEL: 'test_scalarize_call' ; DBG: VPlan 'Initial VPlan for VF={1},UF>=1' { ; DBG-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; DBG-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; DBG-EMPTY: ; DBG-NEXT: vector.ph: ; DBG-NEXT: Successor(s): vector loop ; DBG-EMPTY: ; DBG-NEXT: vector loop: { ; DBG-NEXT: vector.body: -; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; DBG-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%start> + vp<[[CAN_IV]]> * ir<1> ; DBG-NEXT: vp<[[IV_STEPS:%.]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1> ; DBG-NEXT: CLONE ir<%min> = call @llvm.smin.i32(vp<[[IV_STEPS]]>, ir<65535>) @@ -62,7 +63,8 @@ ; DBG-LABEL: 'test_scalarize_with_branch_cond' -; DBG: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; DBG: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; DBG-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; DBG-EMPTY: ; DBG-NEXT: vector.ph: ; DBG-NEXT: Successor(s): vector loop @@ -168,13 +170,14 @@ ; DBG-LABEL: 'first_order_recurrence_using_induction' ; DBG: VPlan 'Initial VPlan for VF={1},UF>=1' { ; DBG-NEXT: Live-in vp<%1> = vector-trip-count +; DBG-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; DBG-EMPTY: ; DBG-NEXT: vector.ph: ; DBG-NEXT: Successor(s): vector loop ; DBG-EMPTY: ; DBG-NEXT: vector loop: { ; DBG-NEXT: vector.body: -; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]> ; DBG-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<1> (truncated to i32) ; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1> Index: llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -22,7 +22,7 @@ ; CHECK-NEXT: label="\ vector loop" ; CHECK-NEXT: N1 [label = ; CHECK-NEXT: "vector.body:\l" + -; CHECK-NEXT: " EMIT vp\<[[CAN_IV:%.+]]\> = CANONICAL-INDUCTION\l" + +; CHECK-NEXT: " EMIT vp\<[[CAN_IV:%.+]]\> = CANONICAL-INDUCTION start: vp\<[[START:%.+]]\>\l" + ; CHECK-NEXT: " vp\<[[STEPS:%.+]]\> = SCALAR-STEPS vp\<[[CAN_IV]]\>, ir\<1\>\l" + ; CHECK-NEXT: " CLONE ir\<%arrayidx\> = getelementptr ir\<%y\>, vp\<[[STEPS]]\>\l" + ; CHECK-NEXT: " WIDEN ir\<%lv\> = load ir\<%arrayidx\>\l" + Index: llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -5,13 +5,14 @@ ; CHECK-LABEL: LV: Checking a loop in 'iv_no_binary_op_in_descriptor' ; CHECK: VPlan 'Initial VPlan for VF={8},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next.p, ir<1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%dst>, vp<[[STEPS:%.+]]> Index: llvm/test/Transforms/LoopVectorize/vplan-printing.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -10,13 +10,14 @@ ; CHECK-LABEL: Checking a loop in 'print_call_and_memory' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> @@ -56,13 +57,14 @@ ; CHECK-LABEL: Checking a loop in 'print_widen_gep_and_select' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0, ir<1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: WIDEN-GEP Inv[Var] ir<%arrayidx> = getelementptr ir<%y>, ir<%iv> @@ -107,13 +109,14 @@ ; CHECK-LABEL: Checking a loop in 'print_reduction' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, vp<[[STEPS]]> @@ -152,13 +155,14 @@ ; CHECK-LABEL: Checking a loop in 'print_reduction_with_invariant_store' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> ; CHECK-NEXT: vp<[[IV:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, vp<[[IV]]> @@ -196,13 +200,14 @@ ; CHECK-LABEL: Checking a loop in 'print_replicate_predicated_phi' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %i = phi 0, %i.next, ir<1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: WIDEN ir<%cmp> = icmp ult ir<%i>, ir<5> @@ -269,13 +274,14 @@ ; CHECK-LABEL: Checking a loop in 'print_interleave_groups' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<4> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<4> ; CHECK-NEXT: CLONE ir<%gep.AB.0> = getelementptr ir<@AB>, ir<0>, vp<[[STEPS]]> @@ -336,13 +342,14 @@ ; CHECK-LABEL: Checking a loop in 'print_fmuladd_strict' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum.07> = phi ir<0.000000e+00>, ir<%muladd> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, vp<[[STEPS]]> @@ -386,13 +393,14 @@ ; CHECK-LABEL: Checking a loop in 'debug_loc_vpinstruction' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%isd> = getelementptr ir<%asd>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%lsd> = load ir<%isd> @@ -471,6 +479,7 @@ ; CHECK-LABEL: Checking a loop in 'print_expand_scev' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<%0> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: EMIT vp<[[EXP_SCEV:%.+]]> = EXPAND SCEV (1 + (%y /u 492802768830814060)) @@ -478,7 +487,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION\l" + ; CHECK-NEXT: " %iv = phi %iv.next, 0\l" + ; CHECK-NEXT: " ir<%v2>, vp<[[EXP_SCEV]]> @@ -521,13 +530,14 @@ ; CHECK-LABEL: Checking a loop in 'print_exit_value' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]> Index: llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -9,13 +9,14 @@ ; CHECK-LABEL: sink_with_sideeffects ; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%tmp2> = getelementptr ir<%ptr>, vp<[[STEPS]]> ; CHECK-NEXT: CLONE ir<%tmp3> = load ir<%tmp2> Index: llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -13,13 +13,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.store @@ -76,13 +77,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.load @@ -154,13 +156,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.load @@ -234,13 +237,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 21, %iv.next, ir<1> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<21> + vp<[[CAN_IV]]> * ir<1> ; CHECK-NEXT: EMIT vp<[[WIDE_CAN_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> @@ -303,13 +307,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%c.1> = icmp ult ir<%iv>, ir<%j> @@ -397,13 +402,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> @@ -500,13 +506,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK1:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> @@ -603,13 +610,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> @@ -702,13 +710,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.store @@ -763,13 +772,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[PRED:%.+]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> @@ -842,13 +852,14 @@ ; CHECK-LABEL: LV: Checking a loop in 'update_multiple_users' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: pred.store: { @@ -905,13 +916,14 @@ ; CHECK-LABEL: LV: Checking a loop in 'sinking_requires_duplication' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1> ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%addr>, vp<[[STEPS]]> ; CHECK-NEXT: WIDEN ir<%0> = load ir<%gep> @@ -973,13 +985,14 @@ ; CHECK: VPlan 'Initial VPlan for Tail Folded VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1> ; CHECK-NEXT: EMIT vp<[[WIDE_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDE_IV]]> vp<[[BTC]]> @@ -1037,13 +1050,14 @@ ; CHECK-LABEL: LV: Checking a loop in 'ptr_induction_remove_dead_recipe' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in vp<[[START:%.+]]> = canonical IV start ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION start: vp<[[START]]> ; CHECK-NEXT: EMIT ir<%ptr.iv> = WIDEN-POINTER-INDUCTION ir<%start>, -1 ; CHECK-NEXT: CLONE ir<%ptr.iv.next> = getelementptr ir<%ptr.iv>, ir<-1> ; CHECK-NEXT: WIDEN ir<%l> = load ir<%ptr.iv.next> Index: llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp =================================================================== --- llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -39,7 +39,7 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) { VPInstruction *DefI = new VPInstruction(Instruction::Add, {}); VPInstruction *UseI = new VPInstruction(Instruction::Sub, {DefI}); - auto *CanIV = new VPCanonicalIVPHIRecipe(UseI, {}); + auto *CanIV = new VPCanonicalIVPHIRecipe(UseI, nullptr, {}); VPInstruction *BranchOnCond = new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); @@ -74,7 +74,7 @@ VPInstruction *I1 = new VPInstruction(Instruction::Add, {}); VPInstruction *DefI = new VPInstruction(Instruction::Add, {}); - auto *CanIV = new VPCanonicalIVPHIRecipe(I1, {}); + auto *CanIV = new VPCanonicalIVPHIRecipe(I1, nullptr, {}); VPInstruction *BranchOnCond = new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); auto *Blend = new VPBlendRecipe(Phi, {DefI});