diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -564,6 +564,13 @@ ArrayRef BypassBlocks, std::pair AdditionalBypass = {nullptr, nullptr}); + /// Returns the original loop trip count. + Value *getTripCount() const { return TripCount; } + + /// Used to set the trip count after ILV's construction and after the + /// preheader block has been executed. + void setTripCount(Value *TC) { TripCount = TC; } + protected: friend class LoopVectorizationPlanner; @@ -605,9 +612,6 @@ /// represented as. void truncateToMinimalBitwidths(VPTransformState &State); - /// Returns (and creates if needed) the original loop trip count. - Value *getOrCreateTripCount(BasicBlock *InsertBlock); - /// Returns (and creates if needed) the trip count of the widened loop. Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock); @@ -2869,41 +2873,12 @@ PredicatedInstructions.push_back(Cloned); } -Value *InnerLoopVectorizer::getOrCreateTripCount(BasicBlock *InsertBlock) { - if (TripCount) - return TripCount; - - assert(InsertBlock); - IRBuilder<> Builder(InsertBlock->getTerminator()); - // Find the loop boundaries. - Type *IdxTy = Legal->getWidestInductionType(); - assert(IdxTy && "No type for induction"); - const SCEV *ExitCount = createTripCountSCEV(IdxTy, PSE, OrigLoop); - - const DataLayout &DL = InsertBlock->getModule()->getDataLayout(); - - // Expand the trip count and place the new instructions in the preheader. - // Notice that the pre-header does not change, only the loop body. - SCEVExpander Exp(*PSE.getSE(), DL, "induction"); - - // Count holds the overall loop count (N). - TripCount = Exp.expandCodeFor(ExitCount, ExitCount->getType(), - InsertBlock->getTerminator()); - - if (TripCount->getType()->isPointerTy()) - TripCount = - CastInst::CreatePointerCast(TripCount, IdxTy, "exitcount.ptrcnt.to.int", - InsertBlock->getTerminator()); - - return TripCount; -} - Value * InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) { if (VectorTripCount) return VectorTripCount; - Value *TC = getOrCreateTripCount(InsertBlock); + Value *TC = getTripCount(); IRBuilder<> Builder(InsertBlock->getTerminator()); Type *Ty = TC->getType(); @@ -2981,7 +2956,7 @@ } void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { - Value *Count = getOrCreateTripCount(LoopVectorPreHeader); + Value *Count = getTripCount(); // Reuse existing vector loop preheader for TC checks. // Note that new preheader block is generated for vector loop. BasicBlock *const TCCheckBlock = LoopVectorPreHeader; @@ -3241,7 +3216,7 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton() { // The trip counts should be cached by now. - Value *Count = getOrCreateTripCount(LoopVectorPreHeader); + Value *Count = getTripCount(); Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator(); @@ -3281,8 +3256,8 @@ the vectorized instructions while the old loop will continue to run the scalar remainder. - [ ] <-- loop iteration number check. - / | + [ ] <-- old preheader - loop iteration number check and SCEVs in Plans + / | preheader are expanded here. / v | [ ] <-- vector loop bypass (may consist of multiple blocks). | / | @@ -3382,7 +3357,7 @@ CountMinusOne->setName("cmo"); VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep()); - Value *Step = StepVPV->getDefiningRecipe() ? State.get(StepVPV, 0) + Value *Step = StepVPV->getDefiningRecipe() ? State.get(StepVPV, {0, 0}) : StepVPV->getLiveInIRValue(); Value *Escape = emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step, II); @@ -7703,23 +7678,24 @@ LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF << ", UF=" << BestUF << '\n'); - // Workaround! Compute the trip count of the original loop and cache it - // before we start modifying the CFG. This code has a systemic problem - // wherein it tries to run analysis over partially constructed IR; this is - // wrong, and not simply for SCEV. The trip count of the original loop - // simply happens to be prone to hitting this in practice. In theory, we - // can hit the same issue for any SCEV, or ValueTracking query done during - // mutation. See PR49900. - ILV.getOrCreateTripCount(OrigLoop->getLoopPreheader()); - if (!IsEpilogueVectorization) VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE); // Perform the actual loop transformation. + VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; + + // 0. Generate SCEV-dependent code into the preheader, including TripCount, + // before making any changes to the CFG. + if (!BestVPlan.getPreheader()->empty()) { + State.CFG.PrevBB = OrigLoop->getLoopPreheader(); + State.Builder.SetInsertPoint(OrigLoop->getLoopPreheader()->getTerminator()); + BestVPlan.getPreheader()->execute(&State); + } + if (!ILV.getTripCount()) + ILV.setTripCount(State.get(BestVPlan.getTripCount(), {0, 0})); // 1. Set up the skeleton for vectorization, including vector pre-header and // middle block. The vector loop is created during VPlan execution. - VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; Value *CanonicalIVStartValue; std::tie(State.CFG.PrevBB, CanonicalIVStartValue) = ILV.createVectorizedLoopSkeleton(); @@ -7755,10 +7731,9 @@ //===------------------------------------------------===// // 2. Copy and widen instructions from the old loop into the new loop. - BestVPlan.prepareToExecute(ILV.getOrCreateTripCount(nullptr), - ILV.getOrCreateVectorTripCount(nullptr), - CanonicalIVStartValue, State, - IsEpilogueVectorization); + BestVPlan.prepareToExecute( + ILV.getTripCount(), ILV.getOrCreateVectorTripCount(nullptr), + CanonicalIVStartValue, State, IsEpilogueVectorization); BestVPlan.execute(&State); @@ -7873,7 +7848,7 @@ assert(Bypass && "Expected valid bypass basic block."); ElementCount VFactor = ForEpilogue ? EPI.EpilogueVF : VF; unsigned UFactor = ForEpilogue ? EPI.EpilogueUF : UF; - Value *Count = getOrCreateTripCount(LoopVectorPreHeader); + Value *Count = getTripCount(); // Reuse existing vector loop preheader for TC checks. // Note that new preheader block is generated for vector loop. BasicBlock *const TCCheckBlock = LoopVectorPreHeader; @@ -8192,7 +8167,7 @@ VPBuilder::InsertPointGuard Guard(Builder); Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint); if (useActiveLaneMask(TFStyle)) { - VPValue *TC = Plan.getOrCreateTripCount(); + VPValue *TC = Plan.getTripCount(); BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC}, nullptr, "active.lane.mask"); } else { @@ -8769,7 +8744,7 @@ VecPreheader->appendRecipe(CanonicalIVIncrementParts); // Create the ActiveLaneMask instruction using the correct start values. - VPValue *TC = Plan.getOrCreateTripCount(); + VPValue *TC = Plan.getTripCount(); VPValue *TripCount, *IncrementValue; if (Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) { @@ -8858,6 +8833,21 @@ } } +static VPlanPtr createInitialVPlan(Type *InductionTy, + PredicatedScalarEvolution &PSE, Loop *L) { + VPBasicBlock *Preheader = new VPBasicBlock("ph"); + assert(InductionTy && "No type for induction"); + + VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); + auto Plan = std::make_unique(Preheader, VecPreheader); + const SCEV *TripCount = createTripCountSCEV(InductionTy, PSE, L); + + VPValue *TripCountVPV = + vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, *PSE.getSE()); + Plan->setTripCount(TripCountVPV); + return Plan; +} + std::optional LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( VFRange &Range, SmallPtrSetImpl &DeadInstructions) { @@ -8911,17 +8901,18 @@ // visit each basic block after having visited its predecessor basic blocks. // --------------------------------------------------------------------------- - // Create initial VPlan skeleton, starting with a block for the pre-header, - // followed by a region for the vector loop, followed by the middle block. The - // skeleton vector loop region contains a header and latch block. - VPBasicBlock *Preheader = new VPBasicBlock("vector.ph"); - auto Plan = std::make_unique(Preheader); - + // Create initial VPlan skeleton, having a basic block for the pre-header + // which contains SCEV expansions that need to happen before the CFG is + // modified; a basic block for the vector pre-header, followed by a region for + // the vector loop, followed by the middle basic block. The skeleton vector + // loop region contains a header and latch basic blocks. + VPlanPtr Plan = + createInitialVPlan(Legal->getWidestInductionType(), PSE, OrigLoop); VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); - VPBlockUtils::insertBlockAfter(TopRegion, Preheader); + VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry()); VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); @@ -9109,7 +9100,8 @@ assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); // Create new empty VPlan - auto Plan = std::make_unique(); + auto Plan = + createInitialVPlan(Legal->getWidestInductionType(), PSE, OrigLoop); // Build hierarchical CFG VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); @@ -9832,7 +9824,8 @@ if (!hasScalarValue(Def, {Part, LastLane})) { // At the moment, VPWidenIntOrFpInductionRecipes and VPScalarIVStepsRecipes can also be uniform. assert((isa(Def->getDefiningRecipe()) || - isa(Def->getDefiningRecipe())) && + isa(Def->getDefiningRecipe()) || + isa(Def->getDefiningRecipe())) && "unexpected recipe found to be invariant"); IsUniform = true; LastLane = 0; @@ -10419,6 +10412,8 @@ VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); Header->setName("vec.epilog.vector.body"); + EpilogILV.setTripCount(MainILV.getTripCount()); + // Ensure that the start values for any VPWidenIntOrFpInductionRecipe, // VPWidenPointerInductionRecipe and VPReductionPHIRecipes are updated // before vectorizing the epilogue loop. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2218,6 +2218,10 @@ /// preheader of the vector loop. VPBasicBlock *Entry; + /// VPBasicBlock corresponding to the original preheader. Used to place + /// VPExpandSCEV recipes for expressions used during skeleton creation. + VPBasicBlock *Preheader; + /// Holds the VFs applicable to this VPlan. SmallSetVector VFs; @@ -2260,9 +2264,15 @@ DenseMap SCEVToExpansion; public: - VPlan(VPBasicBlock *Entry = nullptr) : Entry(Entry) { - if (Entry) - Entry->setPlan(this); + /// Construct an empty VPlan. + VPlan() {} + + /// Construct a VPlan with original preheader \p Preheader and \p Entry to + /// the plan. + VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry) + : Entry(Entry), Preheader(Preheader) { + Entry->setPlan(this); + Preheader->setPlan(this); } ~VPlan(); @@ -2285,12 +2295,16 @@ } /// The trip count of the original loop. - VPValue *getOrCreateTripCount() { - if (!TripCount) - TripCount = new VPValue(); + VPValue *getTripCount() const { + assert(TripCount && "trip count needs to be set before accessing it"); return TripCount; } + void setTripCount(VPValue *TC) { + assert(!TripCount && "trip count already set"); + TripCount = TC; + } + /// The backedge taken count of the original loop. VPValue *getOrCreateBackedgeTakenCount() { if (!BackedgeTakenCount) @@ -2435,6 +2449,10 @@ SCEVToExpansion[S] = V; } + /// \return The block corresponding to the original preheader. + VPBasicBlock *getPreheader() { return Preheader; } + const VPBasicBlock *getPreheader() const { return Preheader; } + private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -163,8 +163,9 @@ } void VPBlockBase::setPlan(VPlan *ParentPlan) { - assert(ParentPlan->getEntry() == this && - "Can only set plan on its entry block."); + assert( + (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) && + "Can only set plan on its entry or preheader block."); Plan = ParentPlan; } @@ -593,8 +594,6 @@ } for (VPValue *VPV : VPLiveInsToFree) delete VPV; - if (TripCount) - delete TripCount; if (BackedgeTakenCount) delete BackedgeTakenCount; } @@ -612,13 +611,6 @@ Value *CanonicalIVStartValue, VPTransformState &State, bool IsEpilogueVectorization) { - - // Check if the trip count is needed, and if so build it. - if (TripCount && TripCount->getNumUsers()) { - for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) - State.set(TripCount, TripCountV, Part); - } - // Check if the backedge taken count is needed, and if so build it. if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); @@ -772,6 +764,12 @@ if (AnyLiveIn) O << "\n"; + if (!getPreheader()->empty()) { + + O << "\n"; + getPreheader()->print(O, "", SlotTracker); + } + for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) { O << '\n'; Block->print(O, "", SlotTracker); @@ -1106,11 +1104,13 @@ } void VPSlotTracker::assignSlots(const VPlan &Plan) { + for (const VPRecipeBase &Recipe : *Plan.getPreheader()) + for (VPValue *Def : Recipe.definedValues()) + assignSlot(Def); + assignSlot(&Plan.VectorTripCount); if (Plan.BackedgeTakenCount) assignSlot(Plan.BackedgeTakenCount); - if (Plan.TripCount) - assignSlot(Plan.TripCount); ReversePostOrderTraversal> RPOT(VPBlockDeepTraversalWrapper(Plan.getEntry())); @@ -1136,9 +1136,8 @@ else if (auto *E = dyn_cast(Expr)) Expanded = Plan.getVPValueOrAddLiveIn(E->getValue()); else { - VPBasicBlock *Preheader = Plan.getEntry(); Expanded = new VPExpandSCEVRecipe(Expr, SE); - Preheader->appendRecipe(Expanded->getDefiningRecipe()); + Plan.getPreheader()->appendRecipe(Expanded->getDefiningRecipe()); } Plan.addSCEVExpansion(Expr, Expanded); return Expanded; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -252,7 +252,7 @@ // Get first lane of vector induction variable. Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); // Get the original loop tripcount. - Value *ScalarTC = State.get(getOperand(1), Part); + Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0)); auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); auto *PredTy = VectorType::get(Int1Ty, State.VF); @@ -288,7 +288,7 @@ break; } case VPInstruction::CalculateTripCountMinusVF: { - Value *ScalarTC = State.get(getOperand(0), Part); + Value *ScalarTC = State.get(getOperand(0), {0, 0}); Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF); Value *Sub = Builder.CreateSub(ScalarTC, Step); @@ -1152,7 +1152,7 @@ &*State.Builder.GetInsertPoint()); for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) - State.set(this, Res, Part); + State.set(this, Res, {Part, 0}); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -158,13 +158,13 @@ ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND8:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX7]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <2 x i64> [[VEC_IND8]], ptr [[TMP9]], align 4 -; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX7]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT10]] = add <2 x i64> [[VEC_IND8]], ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} @@ -238,6 +238,7 @@ ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[START]], [[N_VEC]] +; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[N]], [[START]] ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] @@ -255,14 +256,14 @@ ; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT17:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND13:%.*]] = phi <2 x i64> [ [[INDUCTION12]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX16:%.*]] = add i64 [[START]], [[INDEX9]] -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX16]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0 -; CHECK-NEXT: store <2 x i64> [[VEC_IND13]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX16]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 +; CHECK-NEXT: store <2 x i64> [[VEC_IND13]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT17]] = add nuw i64 [[INDEX9]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT15]] = add <2 x i64> [[VEC_IND13]], -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT17]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT17]], [[N_VEC4]] +; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[CMP_N8]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -341,14 +342,14 @@ ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND10:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX9]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[VEC_IND10]], ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store <2 x i64> [[TMP11]], ptr [[TMP12]], align 4 -; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX9]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <2 x i64> [[VEC_IND10]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[IND_END]] ; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} @@ -517,13 +518,13 @@ ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND5:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX4]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 ; CHECK-NEXT: store <2 x i8> [[VEC_IND5]], ptr [[TMP10]], align 1 -; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX4]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT7]] = add <2 x i8> [[VEC_IND5]], ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT8]], 10000 ; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -38,6 +38,8 @@ ; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END5:%.*]] = add i64 3, [[N_VEC]] +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[N]], -3 +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 16 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] @@ -47,8 +49,8 @@ ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP3]], 16 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF2]] ; CHECK-NEXT: [[IND_END4:%.*]] = add i64 3, [[N_VEC3]] -; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i8 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP8]], i64 0 +; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i8 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[TMP10]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <16 x i8> [[DOTSPLAT]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -56,14 +58,14 @@ ; CHECK-NEXT: [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND9:%.*]] = phi <16 x i8> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX11:%.*]] = add i64 3, [[INDEX8]] -; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX11]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0 -; CHECK-NEXT: store <16 x i8> [[VEC_IND9]], ptr [[TMP11]], align 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX11]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0 +; CHECK-NEXT: store <16 x i8> [[VEC_IND9]], ptr [[TMP13]], align 1 ; CHECK-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX8]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT10]] = add <16 x i8> [[VEC_IND9]], -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N7]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -72,9 +74,9 @@ ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[INDVARS_IV]] to i8 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i8 [[TMP13]], ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[INDVARS_IV]] to i8 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i8 [[TMP15]], ptr [[TMP16]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -55,47 +55,52 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[IV_START]], [[N_VEC]] +; CHECK-NEXT: [[IND_END7:%.*]] = add i64 [[IV_START]], [[N_VEC]] +; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[IV_START]] to i32 +; CHECK-NEXT: [[SMAX3:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP20]], i32 92) +; CHECK-NEXT: [[TMP21:%.*]] = sub i32 [[SMAX3]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = add nuw nsw i64 [[TMP22]], 1 ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[TMP3]], 8 -; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF3]] -; CHECK-NEXT: [[IND_END5:%.*]] = add i64 [[IV_START]], [[N_VEC4]] -; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32 -; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <8 x i32> poison, i32 [[TMP20]], i64 0 -; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT10]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION12:%.*]] = add <8 x i32> [[DOTSPLAT11]], +; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP3]], 8 +; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF4]] +; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[IV_START]], [[N_VEC5]] +; CHECK-NEXT: [[TMP24:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32 +; CHECK-NEXT: [[DOTSPLATINSERT11:%.*]] = insertelement <8 x i32> poison, i32 [[TMP24]], i64 0 +; CHECK-NEXT: [[DOTSPLAT12:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT11]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION13:%.*]] = add <8 x i32> [[DOTSPLAT12]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND13:%.*]] = phi <8 x i32> [ [[INDUCTION12]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX15:%.*]] = add i64 [[IV_START]], [[INDEX9]] -; CHECK-NEXT: [[TMP21:%.*]] = trunc i64 [[OFFSET_IDX15]] to i32 -; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP21]], 0 -; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], -1 -; CHECK-NEXT: [[TMP24:%.*]] = mul <8 x i32> [[VEC_IND13]], -; CHECK-NEXT: [[TMP25:%.*]] = lshr exact <8 x i32> [[TMP24]], -; CHECK-NEXT: [[TMP26:%.*]] = trunc <8 x i32> [[TMP25]] to <8 x i16> -; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP23]] to i64 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[ARR]], i64 [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[TMP28]], i32 0 -; CHECK-NEXT: store <8 x i16> [[TMP26]], ptr [[TMP29]], align 2 -; CHECK-NEXT: [[INDEX_NEXT16]] = add nuw i64 [[INDEX9]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT14]] = add <8 x i32> [[VEC_IND13]], -; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT17:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND14:%.*]] = phi <8 x i32> [ [[INDUCTION13]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX16:%.*]] = add i64 [[IV_START]], [[INDEX10]] +; CHECK-NEXT: [[TMP25:%.*]] = trunc i64 [[OFFSET_IDX16]] to i32 +; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP25]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = add i32 [[TMP26]], -1 +; CHECK-NEXT: [[TMP28:%.*]] = mul <8 x i32> [[VEC_IND14]], +; CHECK-NEXT: [[TMP29:%.*]] = lshr exact <8 x i32> [[TMP28]], +; CHECK-NEXT: [[TMP30:%.*]] = trunc <8 x i32> [[TMP29]] to <8 x i16> +; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP27]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i16, ptr [[ARR]], i64 [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i16, ptr [[TMP32]], i32 0 +; CHECK-NEXT: store <8 x i16> [[TMP30]], ptr [[TMP33]], align 2 +; CHECK-NEXT: [[INDEX_NEXT17]] = add nuw i64 [[INDEX10]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT15]] = add <8 x i32> [[VEC_IND14]], +; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT17]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[TMP34]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC4]] -; CHECK-NEXT: br i1 [[CMP_N8]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N9]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i64 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[VECTOR_SCEVCHECK]] ], [ [[IV_START]], [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi i64 [ [[IND_END6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[VECTOR_SCEVCHECK]] ], [ [[IV_START]], [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL7]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL8]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 ; CHECK-NEXT: [[STORE_IDX:%.*]] = add i32 [[IV_TRUNC]], -1 ; CHECK-NEXT: [[X:%.*]] = mul i32 [[IV_TRUNC]], 196608 diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -917,6 +917,10 @@ ; AVX512-NEXT: [[IND_END17:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP23]] ; AVX512-NEXT: [[TMP24:%.*]] = mul i64 [[N_VEC]], 4 ; AVX512-NEXT: [[IND_END14:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP24]] +; AVX512-NEXT: [[TMP25:%.*]] = shl nsw i64 [[IDX_EXT]], 2 +; AVX512-NEXT: [[TMP26:%.*]] = add nsw i64 [[TMP25]], -4 +; AVX512-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP26]], 2 +; AVX512-NEXT: [[TMP28:%.*]] = add nuw nsw i64 [[TMP27]], 1 ; AVX512-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]] ; AVX512-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 ; AVX512-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] @@ -926,30 +930,30 @@ ; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AVX512-NEXT: [[N_MOD_VF11:%.*]] = urem i64 [[TMP3]], 8 ; AVX512-NEXT: [[N_VEC12:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF11]] -; AVX512-NEXT: [[TMP25:%.*]] = mul i64 [[N_VEC12]], 4 -; AVX512-NEXT: [[IND_END13:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP25]] -; AVX512-NEXT: [[TMP26:%.*]] = mul i64 [[N_VEC12]], 64 -; AVX512-NEXT: [[IND_END16:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP26]] +; AVX512-NEXT: [[TMP29:%.*]] = mul i64 [[N_VEC12]], 4 +; AVX512-NEXT: [[IND_END13:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP29]] +; AVX512-NEXT: [[TMP30:%.*]] = mul i64 [[N_VEC12]], 64 +; AVX512-NEXT: [[IND_END16:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP30]] ; AVX512-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; AVX512: vec.epilog.vector.body: ; AVX512-NEXT: [[POINTER_PHI22:%.*]] = phi ptr [ [[BC_RESUME_VAL10]], [[VEC_EPILOG_PH]] ], [ [[PTR_IND23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; AVX512-NEXT: [[INDEX20:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT26:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; AVX512-NEXT: [[TMP27:%.*]] = add i64 [[INDEX20]], 0 -; AVX512-NEXT: [[TMP28:%.*]] = mul i64 [[TMP27]], 4 -; AVX512-NEXT: [[NEXT_GEP21:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP28]] -; AVX512-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[POINTER_PHI22]], <8 x i64> -; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[NEXT_GEP21]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds float, ptr [[TMP30]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD24:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope !23 -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD24]], <8 x ptr> [[TMP29]], i32 4, <8 x i1> ), !alias.scope !26, !noalias !28 -; AVX512-NEXT: [[TMP32:%.*]] = getelementptr float, ptr [[NEXT_GEP21]], i32 0 -; AVX512-NEXT: [[WIDE_LOAD25:%.*]] = load <8 x float>, ptr [[TMP32]], align 4, !alias.scope !30 -; AVX512-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, <8 x ptr> [[TMP29]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD25]], <8 x ptr> [[TMP33]], i32 4, <8 x i1> ), !alias.scope !26, !noalias !28 +; AVX512-NEXT: [[TMP31:%.*]] = add i64 [[INDEX20]], 0 +; AVX512-NEXT: [[TMP32:%.*]] = mul i64 [[TMP31]], 4 +; AVX512-NEXT: [[NEXT_GEP21:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP32]] +; AVX512-NEXT: [[TMP33:%.*]] = getelementptr i8, ptr [[POINTER_PHI22]], <8 x i64> +; AVX512-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[NEXT_GEP21]], i64 [[IDXPROM]] +; AVX512-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP34]], i32 0 +; AVX512-NEXT: [[WIDE_LOAD24:%.*]] = load <8 x float>, ptr [[TMP35]], align 4, !alias.scope !23 +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD24]], <8 x ptr> [[TMP33]], i32 4, <8 x i1> ), !alias.scope !26, !noalias !28 +; AVX512-NEXT: [[TMP36:%.*]] = getelementptr float, ptr [[NEXT_GEP21]], i32 0 +; AVX512-NEXT: [[WIDE_LOAD25:%.*]] = load <8 x float>, ptr [[TMP36]], align 4, !alias.scope !30 +; AVX512-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, <8 x ptr> [[TMP33]], i64 1 +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD25]], <8 x ptr> [[TMP37]], i32 4, <8 x i1> ), !alias.scope !26, !noalias !28 ; AVX512-NEXT: [[INDEX_NEXT26]] = add nuw i64 [[INDEX20]], 8 ; AVX512-NEXT: [[PTR_IND23]] = getelementptr i8, ptr [[POINTER_PHI22]], i64 512 -; AVX512-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT26]], [[N_VEC12]] -; AVX512-NEXT: br i1 [[TMP34]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; AVX512-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT26]], [[N_VEC12]] +; AVX512-NEXT: br i1 [[TMP38]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: [[CMP_N19:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC12]] ; AVX512-NEXT: br i1 [[CMP_N19]], label [[FOR_END]], label [[VEC_EPILOG_SCALAR_PH]] @@ -961,11 +965,11 @@ ; AVX512-NEXT: [[PTR_ADDR_012:%.*]] = phi ptr [ [[BC_RESUME_VAL15]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ] ; AVX512-NEXT: [[DEST_ADDR_011:%.*]] = phi ptr [ [[BC_RESUME_VAL18]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD_PTR6:%.*]], [[FOR_BODY]] ] ; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[PTR_ADDR_012]], i64 [[IDXPROM]] -; AVX512-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; AVX512-NEXT: store float [[TMP35]], ptr [[DEST_ADDR_011]], align 4 -; AVX512-NEXT: [[TMP36:%.*]] = load float, ptr [[PTR_ADDR_012]], align 4 +; AVX512-NEXT: [[TMP39:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; AVX512-NEXT: store float [[TMP39]], ptr [[DEST_ADDR_011]], align 4 +; AVX512-NEXT: [[TMP40:%.*]] = load float, ptr [[PTR_ADDR_012]], align 4 ; AVX512-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[DEST_ADDR_011]], i64 1 -; AVX512-NEXT: store float [[TMP36]], ptr [[ARRAYIDX5]], align 4 +; AVX512-NEXT: store float [[TMP40]], ptr [[ARRAYIDX5]], align 4 ; AVX512-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, ptr [[PTR_ADDR_012]], i64 1 ; AVX512-NEXT: [[ADD_PTR6]] = getelementptr inbounds float, ptr [[DEST_ADDR_011]], i64 16 ; AVX512-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[ADD_PTR]] diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll --- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll +++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll @@ -29,7 +29,7 @@ ; CHECK: L1.early.exit: ; CHECK-NEXT: ret void ; CHECK: L1.exit: -; CHECK-NEXT: [[INDUCTION_IV_LCSSA3:%.*]] = phi i32 [ [[INDUCTION_IV]], [[L1_BACKEDGE]] ] +; CHECK-NEXT: [[INDUCTION_IV_LCSSA2:%.*]] = phi i32 [ [[INDUCTION_IV]], [[L1_BACKEDGE]] ] ; CHECK-NEXT: [[INDUCTION_IV_LCSSA1:%.*]] = phi i32 [ [[INDUCTION_IV]], [[L1_BACKEDGE]] ] ; CHECK-NEXT: [[L1_EXIT_VAL:%.*]] = phi i32 [ [[L1_SUM_NEXT]], [[L1_BACKEDGE]] ] ; CHECK-NEXT: br label [[L2_HEADER:%.*]] @@ -45,7 +45,7 @@ ; CHECK: L2.Inner.header.preheader: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 12, [[INDUCTION_IV_LCSSA1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 12, [[INDUCTION_IV_LCSSA2]] ; CHECK-NEXT: [[IND_END:%.*]] = add i32 1, [[TMP3]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -58,16 +58,16 @@ ; CHECK-NEXT: br i1 [[CMP_N]], label [[L2_HEADER_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[L2_INNER_HEADER_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 13, [[MIDDLE_BLOCK]] ], [ 1, [[L2_INNER_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ 13, [[MIDDLE_BLOCK]] ], [ 1, [[L2_INNER_HEADER_PREHEADER]] ] ; CHECK-NEXT: br label [[L2_INNER_HEADER:%.*]] ; CHECK: L2.Inner.header: ; CHECK-NEXT: [[L2_ACCUM:%.*]] = phi i32 [ [[L2_ACCUM_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[L2_IV:%.*]] = phi i64 [ [[L2_IV_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[L2_IV:%.*]] = phi i64 [ [[L2_IV_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[L2_ACCUM_NEXT]] = sub i32 [[L2_ACCUM]], [[L1_EXIT_VAL]] ; CHECK-NEXT: [[L2_DUMMY_BUT_NEED_IT:%.*]] = sext i32 [[L2_ACCUM_NEXT]] to i64 ; CHECK-NEXT: [[L2_IV_NEXT]] = add nuw nsw i64 [[L2_IV]], 1 ; CHECK-NEXT: [[L2_EXIT_COND:%.*]] = icmp ugt i64 [[L2_IV]], 11 -; CHECK-NEXT: br i1 [[L2_EXIT_COND]], label [[L2_HEADER_LOOPEXIT]], label [[L2_INNER_HEADER]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[L2_EXIT_COND]], label [[L2_HEADER_LOOPEXIT]], label [[L2_INNER_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: L2.exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -12,6 +12,10 @@ ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<%0> = EXPAND SCEV (1 smax (1 + (sext i8 %y to i32))) +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -895,15 +895,13 @@ ; UNROLL-NO-IC-NEXT: entry: ; UNROLL-NO-IC-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; UNROLL-NO-IC: for.cond1.preheader: -; UNROLL-NO-IC-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3]] ] +; UNROLL-NO-IC-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] ; UNROLL-NO-IC-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDVAR]], 1 -; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8 +; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I_016]], 8 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: -; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 8 -; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] +; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 8 +; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3 ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0 @@ -915,14 +913,14 @@ ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] @@ -943,38 +941,35 @@ ; UNROLL-NO-IC-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49 -; UNROLL-NO-IC-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]] ; ; UNROLL-NO-VF-LABEL: @PR27246( ; UNROLL-NO-VF-NEXT: entry: ; UNROLL-NO-VF-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; UNROLL-NO-VF: for.cond1.preheader: -; UNROLL-NO-VF-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3]] ] +; UNROLL-NO-VF-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] ; UNROLL-NO-VF-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ] -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[INDVAR]], 1 -; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 +; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I_016]], 2 ; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-VF: vector.ph: -; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 -; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] +; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 2 +; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]] ; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[E_015]], [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[E_015]], [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[I_016]], [[INDEX]] -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 -; UNROLL-NO-VF-NEXT: [[TMP2]] = add i32 [[OFFSET_IDX]], -1 +; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 +; UNROLL-NO-VF-NEXT: [[TMP1]] = add i32 [[OFFSET_IDX]], -1 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-VF: middle.block: -; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_COND1:%.*]] ; UNROLL-NO-VF: for.cond.cleanup: @@ -987,25 +982,22 @@ ; UNROLL-NO-VF-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1 ; UNROLL-NO-VF-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]] ; UNROLL-NO-VF: for.cond.cleanup3: -; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49 -; UNROLL-NO-VF-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]] ; ; SINK-AFTER-LABEL: @PR27246( ; SINK-AFTER-NEXT: entry: ; SINK-AFTER-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; SINK-AFTER: for.cond1.preheader: -; SINK-AFTER-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; SINK-AFTER-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3]] ] +; SINK-AFTER-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] ; SINK-AFTER-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[INDVAR]], 1 -; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 +; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I_016]], 4 ; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SINK-AFTER: vector.ph: -; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 -; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] +; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 4 +; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]] ; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3 ; SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0 @@ -1016,13 +1008,13 @@ ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> +; SINK-AFTER-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 2 ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] @@ -1043,7 +1035,6 @@ ; SINK-AFTER-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49 -; SINK-AFTER-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll @@ -24,26 +24,26 @@ ; STRIDED-LABEL: @non_constant_scalar_expansion( ; STRIDED-NEXT: entry: ; STRIDED-NEXT: [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1 +; STRIDED-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64 ; STRIDED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; STRIDED: vector.ph: -; STRIDED-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64 -; STRIDED-NEXT: [[TMP2:%.*]] = mul i64 4294967264, [[TMP1]] -; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]] -; STRIDED-NEXT: [[TMP3:%.*]] = sext i32 [[MUL]] to i64 +; STRIDED-NEXT: [[TMP2:%.*]] = sext i32 [[MUL]] to i64 +; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 4294967264, [[TMP2]] +; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]] ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; STRIDED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP3]] +; STRIDED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP1]] ; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]] ; STRIDED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1 -; STRIDED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], [[TMP3]] +; STRIDED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], [[TMP1]] ; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]] ; STRIDED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 2 -; STRIDED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], [[TMP3]] +; STRIDED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], [[TMP1]] ; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]] ; STRIDED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 3 -; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], [[TMP3]] +; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], [[TMP1]] ; STRIDED-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] ; STRIDED-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32 ; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = add i32 30, [[DOTCAST]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -228,20 +228,20 @@ ; STRIDED-LABEL: @non_constant_vector_expansion( ; STRIDED-NEXT: entry: ; STRIDED-NEXT: [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1 +; STRIDED-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64 ; STRIDED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; STRIDED: vector.scevcheck: ; STRIDED-NEXT: br i1 true, label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; STRIDED: vector.ph: -; STRIDED-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64 -; STRIDED-NEXT: [[TMP2:%.*]] = mul i64 4294967264, [[TMP1]] -; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]] -; STRIDED-NEXT: [[TMP3:%.*]] = sext i32 [[MUL]] to i64 +; STRIDED-NEXT: [[TMP2:%.*]] = sext i32 [[MUL]] to i64 +; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 4294967264, [[TMP2]] +; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]] ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 -; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0 +; STRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], 4 +; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = mul <4 x i64> , [[DOTSPLAT]] ; STRIDED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[VECTOR_GEP]] diff --git a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll --- a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll @@ -19,38 +19,38 @@ ; CHECK-NEXT: [[INDUCTION_IV_NEXT]] = add i32 [[INDUCTION_IV]], [[TMP1]] ; CHECK-NEXT: br i1 false, label [[LOOP_1]], label [[LOOP_2_PREHEADER:%.*]] ; CHECK: loop.2.preheader: -; CHECK-NEXT: [[INDUCTION_IV_LCSSA2:%.*]] = phi i32 [ [[INDUCTION_IV]], [[LOOP_1]] ] +; CHECK-NEXT: [[INDUCTION_IV_LCSSA1:%.*]] = phi i32 [ [[INDUCTION_IV]], [[LOOP_1]] ] ; CHECK-NEXT: [[INDUCTION_IV_LCSSA:%.*]] = phi i32 [ [[INDUCTION_IV]], [[LOOP_1]] ] ; CHECK-NEXT: [[IV_1_LCSSA:%.*]] = phi i32 [ [[IV_1]], [[LOOP_1]] ] ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = mul i32 196, [[INDUCTION_IV_LCSSA]] +; CHECK-NEXT: [[IND_END:%.*]] = mul i32 196, [[INDUCTION_IV_LCSSA1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], [[INDUCTION_IV_LCSSA2]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 0, [[INDUCTION_IV_LCSSA2]] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], [[INDUCTION_IV_LCSSA]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 0, [[INDUCTION_IV_LCSSA]] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 1, [[INDUCTION_IV_LCSSA2]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i32 1, [[INDUCTION_IV_LCSSA]] ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 2, [[INDUCTION_IV_LCSSA2]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 2, [[INDUCTION_IV_LCSSA]] ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 3, [[INDUCTION_IV_LCSSA2]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 3, [[INDUCTION_IV_LCSSA]] ; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], [[TMP8]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 199, 196 -; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[INDUCTION_IV_LCSSA2]] +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[INDUCTION_IV_LCSSA]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_3_PREHEADER:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP_2:%.*]] ; CHECK: loop.2: ; CHECK-NEXT: [[IV_3:%.*]] = phi i16 [ [[IV_3_NEXT:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[IV_4:%.*]] = phi i32 [ [[IV_4_NEXT:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IV_4:%.*]] = phi i32 [ [[IV_4_NEXT:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[IV_4_NEXT]] = sub i32 [[IV_4]], [[IV_1_LCSSA]] ; CHECK-NEXT: [[IV_3_NEXT]] = add i16 [[IV_3]], 1 ; CHECK-NEXT: [[CMP88_1:%.*]] = icmp ult i16 [[IV_3]], 198 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -8,6 +8,8 @@ define void @foo(i64 %n) { ; CHECK: VPlan 'HCFGBuilder: Plain CFG ; CHECK-NEXT: { +; CHECK-NEXT: Live-in ir<8> = original trip-count +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): outer.header ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -9,6 +9,7 @@ ; CHECK-LABEL: sink_with_sideeffects ; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<0> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -14,6 +14,10 @@ ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<{{.*}}> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -77,6 +81,10 @@ ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<{{.*}}> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -155,6 +163,10 @@ ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<{{.*}}> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -304,6 +316,10 @@ ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<{{.*}}> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -398,6 +414,10 @@ ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<{{.*}}> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -501,6 +521,10 @@ ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<{{.*}}> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -604,6 +628,10 @@ ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<{{.*}}> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -703,6 +731,10 @@ ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<{{.*}}> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -764,6 +796,10 @@ ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<{{.*}}> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: @@ -972,6 +1008,7 @@ ; CHECK-LABEL: LV: Checking a loop in 'merge_with_dead_gep_between_regions' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<[[ORIG_TC:%.+]]> = original trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: @@ -1038,6 +1075,10 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<%0> = EXPAND SCEV ((-1 * (ptrtoint ptr %end to i64)) + (ptrtoint ptr %start to i64)) +; CHECK-NEXT: No successors +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: