diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -564,6 +564,14 @@ ArrayRef BypassBlocks, std::pair AdditionalBypass = {nullptr, nullptr}); + /// Returns the original loop trip count. + Value *getTripCount() const { return TripCount; } + + /// Used to set the trip count after ILV's construction and after the + /// preheader block has been executed. Note that this always holds the trip + /// count of the original loop for both main loop and epilogue vectorization. + void setTripCount(Value *TC) { TripCount = TC; } + protected: friend class LoopVectorizationPlanner; @@ -605,9 +613,6 @@ /// represented as. void truncateToMinimalBitwidths(VPTransformState &State); - /// Returns (and creates if needed) the original loop trip count. - Value *getOrCreateTripCount(BasicBlock *InsertBlock); - /// Returns (and creates if needed) the trip count of the widened loop. Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock); @@ -2869,41 +2874,12 @@ PredicatedInstructions.push_back(Cloned); } -Value *InnerLoopVectorizer::getOrCreateTripCount(BasicBlock *InsertBlock) { - if (TripCount) - return TripCount; - - assert(InsertBlock); - IRBuilder<> Builder(InsertBlock->getTerminator()); - // Find the loop boundaries. - Type *IdxTy = Legal->getWidestInductionType(); - assert(IdxTy && "No type for induction"); - const SCEV *ExitCount = createTripCountSCEV(IdxTy, PSE, OrigLoop); - - const DataLayout &DL = InsertBlock->getModule()->getDataLayout(); - - // Expand the trip count and place the new instructions in the preheader. - // Notice that the pre-header does not change, only the loop body. - SCEVExpander Exp(*PSE.getSE(), DL, "induction"); - - // Count holds the overall loop count (N). - TripCount = Exp.expandCodeFor(ExitCount, ExitCount->getType(), - InsertBlock->getTerminator()); - - if (TripCount->getType()->isPointerTy()) - TripCount = - CastInst::CreatePointerCast(TripCount, IdxTy, "exitcount.ptrcnt.to.int", - InsertBlock->getTerminator()); - - return TripCount; -} - Value * InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) { if (VectorTripCount) return VectorTripCount; - Value *TC = getOrCreateTripCount(InsertBlock); + Value *TC = getTripCount(); IRBuilder<> Builder(InsertBlock->getTerminator()); Type *Ty = TC->getType(); @@ -2981,7 +2957,7 @@ } void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { - Value *Count = getOrCreateTripCount(LoopVectorPreHeader); + Value *Count = getTripCount(); // Reuse existing vector loop preheader for TC checks. // Note that new preheader block is generated for vector loop. BasicBlock *const TCCheckBlock = LoopVectorPreHeader; @@ -3241,7 +3217,7 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton() { // The trip counts should be cached by now. - Value *Count = getOrCreateTripCount(LoopVectorPreHeader); + Value *Count = getTripCount(); Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader); auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator(); @@ -3281,8 +3257,9 @@ the vectorized instructions while the old loop will continue to run the scalar remainder. - [ ] <-- loop iteration number check. - / | + [ ] <-- old preheader - loop iteration number check and SCEVs in Plan's + / | preheader are expanded here. Eventually all required SCEV + / | expansion should happen here. / v | [ ] <-- vector loop bypass (may consist of multiple blocks). | / | @@ -3384,7 +3361,7 @@ VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep()); assert(StepVPV && "step must have been expanded during VPlan execution"); Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue() - : State.get(StepVPV, 0); + : State.get(StepVPV, {0, 0}); Value *Escape = emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step, II); Escape->setName("ind.escape"); @@ -7704,23 +7681,27 @@ LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF << ", UF=" << BestUF << '\n'); - // Workaround! Compute the trip count of the original loop and cache it - // before we start modifying the CFG. This code has a systemic problem - // wherein it tries to run analysis over partially constructed IR; this is - // wrong, and not simply for SCEV. The trip count of the original loop - // simply happens to be prone to hitting this in practice. In theory, we - // can hit the same issue for any SCEV, or ValueTracking query done during - // mutation. See PR49900. - ILV.getOrCreateTripCount(OrigLoop->getLoopPreheader()); - if (!IsEpilogueVectorization) VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE); // Perform the actual loop transformation. + VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; + + // 0. Generate SCEV-dependent code into the preheader, including TripCount, + // before making any changes to the CFG. + if (!BestVPlan.getPreheader()->empty()) { + State.CFG.PrevBB = OrigLoop->getLoopPreheader(); + State.Builder.SetInsertPoint(OrigLoop->getLoopPreheader()->getTerminator()); + BestVPlan.getPreheader()->execute(&State); + } + if (!ILV.getTripCount()) + ILV.setTripCount(State.get(BestVPlan.getTripCount(), {0, 0})); + else + assert(IsEpilogueVectorization && "should only re-use the existing trip " + "count during epilogue vectorization"); // 1. Set up the skeleton for vectorization, including vector pre-header and // middle block. The vector loop is created during VPlan execution. - VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan}; Value *CanonicalIVStartValue; std::tie(State.CFG.PrevBB, CanonicalIVStartValue) = ILV.createVectorizedLoopSkeleton(); @@ -7756,10 +7737,9 @@ //===------------------------------------------------===// // 2. Copy and widen instructions from the old loop into the new loop. - BestVPlan.prepareToExecute(ILV.getOrCreateTripCount(nullptr), - ILV.getOrCreateVectorTripCount(nullptr), - CanonicalIVStartValue, State, - IsEpilogueVectorization); + BestVPlan.prepareToExecute( + ILV.getTripCount(), ILV.getOrCreateVectorTripCount(nullptr), + CanonicalIVStartValue, State, IsEpilogueVectorization); BestVPlan.execute(&State); @@ -7874,7 +7854,7 @@ assert(Bypass && "Expected valid bypass basic block."); ElementCount VFactor = ForEpilogue ? EPI.EpilogueVF : VF; unsigned UFactor = ForEpilogue ? EPI.EpilogueUF : UF; - Value *Count = getOrCreateTripCount(LoopVectorPreHeader); + Value *Count = getTripCount(); // Reuse existing vector loop preheader for TC checks. // Note that new preheader block is generated for vector loop. BasicBlock *const TCCheckBlock = LoopVectorPreHeader; @@ -8193,7 +8173,7 @@ VPBuilder::InsertPointGuard Guard(Builder); Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint); if (useActiveLaneMask(TFStyle)) { - VPValue *TC = Plan.getOrCreateTripCount(); + VPValue *TC = Plan.getTripCount(); BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC}, nullptr, "active.lane.mask"); } else { @@ -8770,7 +8750,7 @@ VecPreheader->appendRecipe(CanonicalIVIncrementParts); // Create the ActiveLaneMask instruction using the correct start values. - VPValue *TC = Plan.getOrCreateTripCount(); + VPValue *TC = Plan.getTripCount(); VPValue *TripCount, *IncrementValue; if (Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) { @@ -8912,17 +8892,19 @@ // visit each basic block after having visited its predecessor basic blocks. // --------------------------------------------------------------------------- - // Create initial VPlan skeleton, starting with a block for the pre-header, - // followed by a region for the vector loop, followed by the middle block. The - // skeleton vector loop region contains a header and latch block. - VPBasicBlock *Preheader = new VPBasicBlock("vector.ph"); - auto Plan = std::make_unique(Preheader); - + // Create initial VPlan skeleton, having a basic block for the pre-header + // which contains SCEV expansions that need to happen before the CFG is + // modified; a basic block for the vector pre-header, followed by a region for + // the vector loop, followed by the middle basic block. The skeleton vector + // loop region contains a header and latch basic blocks. + VPlanPtr Plan = VPlan::createInitialVPlan( + createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), + *PSE.getSE()); VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); - VPBlockUtils::insertBlockAfter(TopRegion, Preheader); + VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry()); VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); @@ -9110,7 +9092,9 @@ assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); // Create new empty VPlan - auto Plan = std::make_unique(); + auto Plan = VPlan::createInitialVPlan( + createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), + *PSE.getSE()); // Build hierarchical CFG VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); @@ -9831,9 +9815,11 @@ unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue() - 1; // Check if there is a scalar value for the selected lane. if (!hasScalarValue(Def, {Part, LastLane})) { - // At the moment, VPWidenIntOrFpInductionRecipes and VPScalarIVStepsRecipes can also be uniform. + // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and + // VPExpandSCEVRecipes can also be uniform. assert((isa(Def->getDefiningRecipe()) || - isa(Def->getDefiningRecipe())) && + isa(Def->getDefiningRecipe()) || + isa(Def->getDefiningRecipe())) && "unexpected recipe found to be invariant"); IsUniform = true; LastLane = 0; @@ -10420,6 +10406,16 @@ VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); Header->setName("vec.epilog.vector.body"); + // Re-use the trip count expanded for the main loop, as skeleton + // creation needs it as a value that dominates both the scalar and + // vector epilogue loops + EpilogILV.setTripCount(MainILV.getTripCount()); + if (auto *R = BestEpiPlan.getTripCount()->getDefiningRecipe()) { + assert(BestEpiPlan.getTripCount()->getNumUsers() == 0 && + "trip count VPValue cannot be used in epilogue plan"); + R->eraseFromParent(); + } + // Ensure that the start values for any VPWidenIntOrFpInductionRecipe, // VPWidenPointerInductionRecipe and VPReductionPHIRecipes are updated // before vectorizing the epilogue loop. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2218,6 +2218,11 @@ /// preheader of the vector loop. VPBasicBlock *Entry; + /// VPBasicBlock corresponding to the original preheader. Used to place + /// VPExpandSCEV recipes for expressions used during skeleton creation and the + /// rest of VPlan execution. + VPBasicBlock *Preheader; + /// Holds the VFs applicable to this VPlan. SmallSetVector VFs; @@ -2260,13 +2265,35 @@ DenseMap SCEVToExpansion; public: - VPlan(VPBasicBlock *Entry = nullptr) : Entry(Entry) { - if (Entry) - Entry->setPlan(this); + /// Construct a VPlan with original preheader \p Preheader, trip count \p TC + /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to + /// be disconnected, as the bypass blocks between them are not yet modeled in + /// VPlan. + VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry) + : VPlan(Preheader, Entry) { + TripCount = TC; + } + + /// Construct a VPlan with original preheader \p Preheader and \p Entry to + /// the plan. At the moment, \p Preheader and \p Entry need to be + /// disconnected, as the bypass blocks between them are not yet modeled in + /// VPlan. + VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry) + : Entry(Entry), Preheader(Preheader) { + Entry->setPlan(this); + Preheader->setPlan(this); + assert(Preheader->getNumSuccessors() == 0 && + Preheader->getNumPredecessors() == 0 && + "preheader must be disconnected"); } ~VPlan(); + /// Create an initial VPlan with preheader and entry blocks. Creates a + /// VPExpandSCEVRecipe for \p TripCount and uses it as plan's trip count. + static VPlanPtr createInitialVPlan(const SCEV *TripCount, + ScalarEvolution &PSE); + /// Prepare the plan for execution, setting up the required live-in values. void prepareToExecute(Value *TripCount, Value *VectorTripCount, Value *CanonicalIVStartValue, VPTransformState &State, @@ -2285,9 +2312,8 @@ } /// The trip count of the original loop. - VPValue *getOrCreateTripCount() { - if (!TripCount) - TripCount = new VPValue(); + VPValue *getTripCount() const { + assert(TripCount && "trip count needs to be set before accessing it"); return TripCount; } @@ -2435,6 +2461,10 @@ SCEVToExpansion[S] = V; } + /// \return The block corresponding to the original preheader. + VPBasicBlock *getPreheader() { return Preheader; } + const VPBasicBlock *getPreheader() const { return Preheader; } + private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -163,8 +163,9 @@ } void VPBlockBase::setPlan(VPlan *ParentPlan) { - assert(ParentPlan->getEntry() == this && - "Can only set plan on its entry block."); + assert( + (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) && + "Can only set plan on its entry or preheader block."); Plan = ParentPlan; } @@ -593,12 +594,19 @@ } for (VPValue *VPV : VPLiveInsToFree) delete VPV; - if (TripCount) - delete TripCount; if (BackedgeTakenCount) delete BackedgeTakenCount; } +VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE) { + VPBasicBlock *Preheader = new VPBasicBlock("ph"); + VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); + auto Plan = std::make_unique(Preheader, VecPreheader); + Plan->TripCount = + vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE); + return Plan; +} + VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() { VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); for (VPRecipeBase &R : Header->phis()) { @@ -612,13 +620,6 @@ Value *CanonicalIVStartValue, VPTransformState &State, bool IsEpilogueVectorization) { - - // Check if the trip count is needed, and if so build it. - if (TripCount && TripCount->getNumUsers()) { - for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) - State.set(TripCount, TripCountV, Part); - } - // Check if the backedge taken count is needed, and if so build it. if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); @@ -747,30 +748,29 @@ O << "VPlan '" << getName() << "' {"; - bool AnyLiveIn = false; if (VectorTripCount.getNumUsers() > 0) { O << "\nLive-in "; VectorTripCount.printAsOperand(O, SlotTracker); O << " = vector-trip-count"; - AnyLiveIn = true; - } - - if (TripCount && TripCount->getNumUsers() > 0) { - O << "\nLive-in "; - TripCount->printAsOperand(O, SlotTracker); - O << " = original trip-count"; - AnyLiveIn = true; } if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { O << "\nLive-in "; BackedgeTakenCount->printAsOperand(O, SlotTracker); O << " = backedge-taken count"; - AnyLiveIn = true; } - if (AnyLiveIn) + O << "\n"; + if (TripCount->isLiveIn()) + O << "Live-in "; + TripCount->printAsOperand(O, SlotTracker); + O << " = original trip-count"; + O << "\n"; + + if (!getPreheader()->empty()) { O << "\n"; + getPreheader()->print(O, "", SlotTracker); + } for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) { O << '\n'; @@ -897,6 +897,8 @@ OS << "edge [fontname=Courier, fontsize=30]\n"; OS << "compound=true\n"; + dumpBlock(Plan.getPreheader()); + for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry())) dumpBlock(Block); @@ -1109,8 +1111,7 @@ assignSlot(&Plan.VectorTripCount); if (Plan.BackedgeTakenCount) assignSlot(Plan.BackedgeTakenCount); - if (Plan.TripCount) - assignSlot(Plan.TripCount); + assignSlots(Plan.getPreheader()); ReversePostOrderTraversal> RPOT(VPBlockDeepTraversalWrapper(Plan.getEntry())); @@ -1140,10 +1141,8 @@ else if (auto *E = dyn_cast(Expr)) Expanded = Plan.getVPValueOrAddLiveIn(E->getValue()); else { - - VPBasicBlock *Preheader = Plan.getEntry(); Expanded = new VPExpandSCEVRecipe(Expr, SE); - Preheader->appendRecipe(Expanded->getDefiningRecipe()); + Plan.getPreheader()->appendRecipe(Expanded->getDefiningRecipe()); } Plan.addSCEVExpansion(Expr, Expanded); return Expanded; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -252,7 +252,7 @@ // Get first lane of vector induction variable. Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); // Get the original loop tripcount. - Value *ScalarTC = State.get(getOperand(1), Part); + Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0)); auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); auto *PredTy = VectorType::get(Int1Ty, State.VF); @@ -288,7 +288,7 @@ break; } case VPInstruction::CalculateTripCountMinusVF: { - Value *ScalarTC = State.get(getOperand(0), Part); + Value *ScalarTC = State.get(getOperand(0), {0, 0}); Value *Step = createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF); Value *Sub = Builder.CreateSub(ScalarTC, Step); @@ -1152,7 +1152,7 @@ &*State.Builder.GetInsertPoint()); for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) - State.set(this, Res, Part); + State.set(this, Res, {Part, 0}); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll @@ -10,7 +10,11 @@ ; VPLANS-LABEL: Checking a loop in 'simple_memset' ; VPLANS: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' { -; VPLANS-NEXT: Live-in vp<[[TC:%[0-9]+]]> = original trip-count +; VPLANS-NEXT: vp<[[TC:%[0-9]+]]> = original trip-count +; VPLANS-EMPTY: +; VPLANS-NEXT: ph: +; VPLANS-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 umax %n) +; VPLANS-NEXT: No successors ; VPLANS-EMPTY: ; VPLANS-NEXT: vector.ph: ; VPLANS-NEXT: EMIT vp<[[VF:%[0-9]+]]> = VF * Part + ir<0> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -10,6 +10,7 @@ ; ; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<%N> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll @@ -11,6 +11,7 @@ ; CHECK-LABEL: LV: Checking a loop in 'test_v4_v4m' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -36,6 +37,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -66,6 +68,7 @@ ; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4m' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -91,6 +94,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -120,6 +124,7 @@ ; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -145,6 +150,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll @@ -8,6 +8,7 @@ ; CHECK-LABEL: LV: Checking a loop in 'test' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -34,6 +35,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<1024> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll --- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll +++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll @@ -29,7 +29,7 @@ ; CHECK: L1.early.exit: ; CHECK-NEXT: ret void ; CHECK: L1.exit: -; CHECK-NEXT: [[INDUCTION_IV_LCSSA3:%.*]] = phi i32 [ [[INDUCTION_IV]], [[L1_BACKEDGE]] ] +; CHECK-NEXT: [[INDUCTION_IV_LCSSA2:%.*]] = phi i32 [ [[INDUCTION_IV]], [[L1_BACKEDGE]] ] ; CHECK-NEXT: [[INDUCTION_IV_LCSSA1:%.*]] = phi i32 [ [[INDUCTION_IV]], [[L1_BACKEDGE]] ] ; CHECK-NEXT: [[L1_EXIT_VAL:%.*]] = phi i32 [ [[L1_SUM_NEXT]], [[L1_BACKEDGE]] ] ; CHECK-NEXT: br label [[L2_HEADER:%.*]] @@ -45,7 +45,7 @@ ; CHECK: L2.Inner.header.preheader: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 12, [[INDUCTION_IV_LCSSA1]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i32 12, [[INDUCTION_IV_LCSSA2]] ; CHECK-NEXT: [[IND_END:%.*]] = add i32 1, [[TMP3]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: @@ -58,16 +58,16 @@ ; CHECK-NEXT: br i1 [[CMP_N]], label [[L2_HEADER_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[L2_INNER_HEADER_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 13, [[MIDDLE_BLOCK]] ], [ 1, [[L2_INNER_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ 13, [[MIDDLE_BLOCK]] ], [ 1, [[L2_INNER_HEADER_PREHEADER]] ] ; CHECK-NEXT: br label [[L2_INNER_HEADER:%.*]] ; CHECK: L2.Inner.header: ; CHECK-NEXT: [[L2_ACCUM:%.*]] = phi i32 [ [[L2_ACCUM_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[L2_IV:%.*]] = phi i64 [ [[L2_IV_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[L2_IV:%.*]] = phi i64 [ [[L2_IV_NEXT:%.*]], [[L2_INNER_HEADER]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[L2_ACCUM_NEXT]] = sub i32 [[L2_ACCUM]], [[L1_EXIT_VAL]] ; CHECK-NEXT: [[L2_DUMMY_BUT_NEED_IT:%.*]] = sext i32 [[L2_ACCUM_NEXT]] to i64 ; CHECK-NEXT: [[L2_IV_NEXT]] = add nuw nsw i64 [[L2_IV]], 1 ; CHECK-NEXT: [[L2_EXIT_COND:%.*]] = icmp ugt i64 [[L2_IV]], 11 -; CHECK-NEXT: br i1 [[L2_EXIT_COND]], label [[L2_HEADER_LOOPEXIT]], label [[L2_INNER_HEADER]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[L2_EXIT_COND]], label [[L2_HEADER_LOOPEXIT]], label [[L2_INNER_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: L2.exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -6,6 +6,7 @@ ; CHECK-LABEL: 'test_chained_first_order_recurrences_1' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -55,6 +56,7 @@ ; CHECK-LABEL: 'test_chained_first_order_recurrences_3' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -11,6 +11,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in ir<20001> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -100,6 +101,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in ir<20001> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -168,6 +170,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in ir<20001> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -241,6 +244,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in ir<20001> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -338,6 +342,11 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 smax (1 + (sext i8 %y to i32))) +; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -411,6 +420,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in ir<3> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -895,15 +895,13 @@ ; UNROLL-NO-IC-NEXT: entry: ; UNROLL-NO-IC-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; UNROLL-NO-IC: for.cond1.preheader: -; UNROLL-NO-IC-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3]] ] +; UNROLL-NO-IC-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] ; UNROLL-NO-IC-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDVAR]], 1 -; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8 +; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I_016]], 8 ; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: -; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 8 -; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] +; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 8 +; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]] ; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3 ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0 @@ -915,14 +913,14 @@ ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] @@ -943,38 +941,35 @@ ; UNROLL-NO-IC-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49 -; UNROLL-NO-IC-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]] ; ; UNROLL-NO-VF-LABEL: @PR27246( ; UNROLL-NO-VF-NEXT: entry: ; UNROLL-NO-VF-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; UNROLL-NO-VF: for.cond1.preheader: -; UNROLL-NO-VF-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3]] ] +; UNROLL-NO-VF-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] ; UNROLL-NO-VF-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ] -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[INDVAR]], 1 -; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 +; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I_016]], 2 ; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-VF: vector.ph: -; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2 -; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] +; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 2 +; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]] ; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[E_015]], [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[E_015]], [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[I_016]], [[INDEX]] -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0 -; UNROLL-NO-VF-NEXT: [[TMP2]] = add i32 [[OFFSET_IDX]], -1 +; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 +; UNROLL-NO-VF-NEXT: [[TMP1]] = add i32 [[OFFSET_IDX]], -1 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-VF: middle.block: -; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_COND1:%.*]] ; UNROLL-NO-VF: for.cond.cleanup: @@ -987,25 +982,22 @@ ; UNROLL-NO-VF-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1 ; UNROLL-NO-VF-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]] ; UNROLL-NO-VF: for.cond.cleanup3: -; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49 -; UNROLL-NO-VF-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]] ; ; SINK-AFTER-LABEL: @PR27246( ; SINK-AFTER-NEXT: entry: ; SINK-AFTER-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; SINK-AFTER: for.cond1.preheader: -; SINK-AFTER-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; SINK-AFTER-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3]] ] +; SINK-AFTER-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] ; SINK-AFTER-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[INDVAR]], 1 -; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 +; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I_016]], 4 ; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SINK-AFTER: vector.ph: -; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 -; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]] +; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 4 +; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]] ; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3 ; SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0 @@ -1016,13 +1008,13 @@ ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VEC_IND]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> +; SINK-AFTER-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], -; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 2 ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] @@ -1043,7 +1035,6 @@ ; SINK-AFTER-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49 -; SINK-AFTER-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 ; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -39,6 +39,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in ir<14> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -6,6 +6,11 @@ ; DBG-LABEL: 'test_scalarize_call' ; DBG: VPlan 'Initial VPlan for VF={1},UF>=1' { ; DBG-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; DBG-NEXT: vp<[[TC:%.+]]> = original trip-count +; DBG-EMPTY: +; DBG-NEXT: ph: +; DBG-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1000 + (-1 * %start)) +; DBG-NEXT: No successors ; DBG-EMPTY: ; DBG-NEXT: vector.ph: ; DBG-NEXT: Successor(s): vector loop @@ -63,6 +68,7 @@ ; DBG-LABEL: 'test_scalarize_with_branch_cond' ; DBG: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; DBG-NEXT: Live-in ir<1000> = original trip-count ; DBG-EMPTY: ; DBG-NEXT: vector.ph: ; DBG-NEXT: Successor(s): vector loop @@ -168,6 +174,11 @@ ; DBG-LABEL: 'first_order_recurrence_using_induction' ; DBG: VPlan 'Initial VPlan for VF={1},UF>=1' { ; DBG-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; DBG-NEXT: vp<[[TC:%.+]]> = original trip-count +; DBG-EMPTY: +; DBG-NEXT: ph: +; DBG-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (zext i32 (1 smax %n) to i64) +; DBG-NEXT: No successors ; DBG-EMPTY: ; DBG-NEXT: vector.ph: ; DBG-NEXT: Successor(s): vector loop diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction-unroll.ll @@ -24,26 +24,26 @@ ; STRIDED-LABEL: @non_constant_scalar_expansion( ; STRIDED-NEXT: entry: ; STRIDED-NEXT: [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1 +; STRIDED-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64 ; STRIDED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; STRIDED: vector.ph: -; STRIDED-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64 -; STRIDED-NEXT: [[TMP2:%.*]] = mul i64 4294967264, [[TMP1]] -; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]] -; STRIDED-NEXT: [[TMP3:%.*]] = sext i32 [[MUL]] to i64 +; STRIDED-NEXT: [[TMP2:%.*]] = sext i32 [[MUL]] to i64 +; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 4294967264, [[TMP2]] +; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]] ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; STRIDED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP3]] +; STRIDED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP1]] ; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]] ; STRIDED-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1 -; STRIDED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], [[TMP3]] +; STRIDED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], [[TMP1]] ; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]] ; STRIDED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 2 -; STRIDED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], [[TMP3]] +; STRIDED-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], [[TMP1]] ; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]] ; STRIDED-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 3 -; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], [[TMP3]] +; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], [[TMP1]] ; STRIDED-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] ; STRIDED-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32 ; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = add i32 30, [[DOTCAST]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -228,20 +228,20 @@ ; STRIDED-LABEL: @non_constant_vector_expansion( ; STRIDED-NEXT: entry: ; STRIDED-NEXT: [[MUL:%.*]] = shl i32 [[TMP0:%.*]], 1 +; STRIDED-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64 ; STRIDED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; STRIDED: vector.scevcheck: ; STRIDED-NEXT: br i1 true, label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; STRIDED: vector.ph: -; STRIDED-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64 -; STRIDED-NEXT: [[TMP2:%.*]] = mul i64 4294967264, [[TMP1]] -; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]] -; STRIDED-NEXT: [[TMP3:%.*]] = sext i32 [[MUL]] to i64 +; STRIDED-NEXT: [[TMP2:%.*]] = sext i32 [[MUL]] to i64 +; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 4294967264, [[TMP2]] +; STRIDED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]] ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ null, [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 -; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0 +; STRIDED-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], 4 +; STRIDED-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; STRIDED-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = mul <4 x i64> , [[DOTSPLAT]] ; STRIDED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[VECTOR_GEP]] diff --git a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll --- a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll @@ -19,38 +19,38 @@ ; CHECK-NEXT: [[INDUCTION_IV_NEXT]] = add i32 [[INDUCTION_IV]], [[TMP1]] ; CHECK-NEXT: br i1 false, label [[LOOP_1]], label [[LOOP_2_PREHEADER:%.*]] ; CHECK: loop.2.preheader: -; CHECK-NEXT: [[INDUCTION_IV_LCSSA2:%.*]] = phi i32 [ [[INDUCTION_IV]], [[LOOP_1]] ] +; CHECK-NEXT: [[INDUCTION_IV_LCSSA1:%.*]] = phi i32 [ [[INDUCTION_IV]], [[LOOP_1]] ] ; CHECK-NEXT: [[INDUCTION_IV_LCSSA:%.*]] = phi i32 [ [[INDUCTION_IV]], [[LOOP_1]] ] ; CHECK-NEXT: [[IV_1_LCSSA:%.*]] = phi i32 [ [[IV_1]], [[LOOP_1]] ] ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[IND_END:%.*]] = mul i32 196, [[INDUCTION_IV_LCSSA]] +; CHECK-NEXT: [[IND_END:%.*]] = mul i32 196, [[INDUCTION_IV_LCSSA1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], [[INDUCTION_IV_LCSSA2]] -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 0, [[INDUCTION_IV_LCSSA2]] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], [[INDUCTION_IV_LCSSA]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 0, [[INDUCTION_IV_LCSSA]] ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 1, [[INDUCTION_IV_LCSSA2]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i32 1, [[INDUCTION_IV_LCSSA]] ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 2, [[INDUCTION_IV_LCSSA2]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 2, [[INDUCTION_IV_LCSSA]] ; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = mul i32 3, [[INDUCTION_IV_LCSSA2]] +; CHECK-NEXT: [[TMP8:%.*]] = mul i32 3, [[INDUCTION_IV_LCSSA]] ; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], [[TMP8]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 199, 196 -; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[INDUCTION_IV_LCSSA2]] +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[INDUCTION_IV_LCSSA]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_3_PREHEADER:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_PREHEADER]] ] -; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_PREHEADER]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP_2:%.*]] ; CHECK: loop.2: ; CHECK-NEXT: [[IV_3:%.*]] = phi i16 [ [[IV_3_NEXT:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[IV_4:%.*]] = phi i32 [ [[IV_4_NEXT:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IV_4:%.*]] = phi i32 [ [[IV_4_NEXT:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[IV_4_NEXT]] = sub i32 [[IV_4]], [[IV_1_LCSSA]] ; CHECK-NEXT: [[IV_3_NEXT]] = add i16 [[IV_3]], 1 ; CHECK-NEXT: [[CMP88_1:%.*]] = icmp ult i16 [[IV_3]], 198 diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -13,14 +13,18 @@ ; CHECK-NEXT: edge [fontname=Courier, fontsize=30] ; CHECK-NEXT: compound=true ; CHECK-NEXT: N0 [label = +; CHECK-NEXT: "ph:\l" + +; CHECK-NEXT: "No successors\l" +; CHECK-NEXT: ] +; CHECK-NEXT: N1 [label = ; CHECK-NEXT: "vector.ph:\l" + ; CHECK-NEXT: "Successor(s): vector loop\l" ; CHECK-NEXT: ] -; CHECK-NEXT: N0 -> N1 [ label="" lhead=cluster_N2] -; CHECK-NEXT: subgraph cluster_N2 { +; CHECK-NEXT: N1 -> N2 [ label="" lhead=cluster_N3] +; CHECK-NEXT: subgraph cluster_N3 { ; CHECK-NEXT: fontname=Courier ; CHECK-NEXT: label="\ vector loop" -; CHECK-NEXT: N1 [label = +; CHECK-NEXT: N2 [label = ; CHECK-NEXT: "vector.body:\l" + ; CHECK-NEXT: " EMIT vp\<[[CAN_IV:%.+]]\> = CANONICAL-INDUCTION\l" + ; CHECK-NEXT: " vp\<[[STEPS:%.+]]\> = SCALAR-STEPS vp\<[[CAN_IV]]\>, ir\<1\>\l" + diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -5,6 +5,7 @@ ; CHECK-LABEL: LV: Checking a loop in 'iv_no_binary_op_in_descriptor' ; CHECK: VPlan 'Initial VPlan for VF={8},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -8,6 +8,8 @@ define void @foo(i64 %n) { ; CHECK: VPlan 'HCFGBuilder: Plain CFG ; CHECK-NEXT: { +; CHECK-NEXT: ir<8> = original trip-count +; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): outer.header ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -10,6 +10,7 @@ ; CHECK-LABEL: Checking a loop in 'print_call_and_memory' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -56,6 +57,7 @@ ; CHECK-LABEL: Checking a loop in 'print_widen_gep_and_select' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -107,6 +109,7 @@ ; CHECK-LABEL: Checking a loop in 'print_reduction' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -152,6 +155,7 @@ ; CHECK-LABEL: Checking a loop in 'print_reduction_with_invariant_store' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -196,6 +200,11 @@ ; CHECK-LABEL: Checking a loop in 'print_replicate_predicated_phi' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 smax %n) +; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -269,6 +278,7 @@ ; CHECK-LABEL: Checking a loop in 'print_interleave_groups' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<256> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -336,6 +346,7 @@ ; CHECK-LABEL: Checking a loop in 'print_fmuladd_strict' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -386,6 +397,7 @@ ; CHECK-LABEL: Checking a loop in 'debug_loc_vpinstruction' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<128> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -471,9 +483,14 @@ ; CHECK-LABEL: Checking a loop in 'print_expand_scev' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count ; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + ((15 + (%y /u 492802768830814060)) /u (1 + (%y /u 492802768830814060)))) ; CHECK-NEXT: EMIT vp<[[EXP_SCEV:%.+]]> = EXPAND SCEV (1 + (%y /u 492802768830814060)) +; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { @@ -521,6 +538,7 @@ ; CHECK-LABEL: Checking a loop in 'print_exit_value' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<1000> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -9,6 +9,7 @@ ; CHECK-LABEL: sink_with_sideeffects ; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: ir<0> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -13,6 +13,11 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -76,6 +81,11 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -154,6 +164,11 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -234,6 +249,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in ir<11> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -303,6 +319,11 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -397,6 +418,11 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -500,6 +526,11 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -603,6 +634,11 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -702,6 +738,11 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -763,6 +804,11 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV (1 + (8 umin %k)) +; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -842,6 +888,7 @@ ; CHECK-LABEL: LV: Checking a loop in 'update_multiple_users' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<999> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -905,6 +952,7 @@ ; CHECK-LABEL: LV: Checking a loop in 'sinking_requires_duplication' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<201> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -973,6 +1021,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: Live-in vp<[[BTC:%.+]]> = backedge-taken count +; CHECK-NEXT: Live-in ir<%n> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop @@ -1037,6 +1086,11 @@ ; CHECK-LABEL: LV: Checking a loop in 'ptr_induction_remove_dead_recipe' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count +; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: ph: +; CHECK-NEXT: EMIT vp<[[TC]]> = EXPAND SCEV ((-1 * (ptrtoint ptr %end to i64)) + (ptrtoint ptr %start to i64)) +; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: ; CHECK-NEXT: Successor(s): vector loop diff --git a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp @@ -24,6 +24,7 @@ // \ / // VPBB4 // } + VPBasicBlock *VPPH = new VPBasicBlock("ph"); VPBasicBlock *VPBB0 = new VPBasicBlock("VPBB0"); VPBasicBlock *VPBB1 = new VPBasicBlock("VPBB1"); VPBasicBlock *VPBB2 = new VPBasicBlock("VPBB2"); @@ -39,8 +40,8 @@ VPBlockUtils::connectBlocks(VPBB2, VPBB4); VPBlockUtils::connectBlocks(VPBB3, VPBB4); - VPlan Plan; - Plan.setEntry(VPBB0); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB0); VPDominatorTree VPDT; VPDT.recalculate(Plan); @@ -90,6 +91,7 @@ // R2BB2 // } // + VPBasicBlock *VPPH = new VPBasicBlock("ph"); VPBasicBlock *VPBB0 = new VPBasicBlock("VPBB0"); VPBasicBlock *R1BB1 = new VPBasicBlock(); VPBasicBlock *R1BB2 = new VPBasicBlock(); @@ -112,8 +114,8 @@ VPBlockUtils::connectBlocks(R2BB1, R2BB2); VPBlockUtils::connectBlocks(R1, R2); - VPlan Plan; - Plan.setEntry(VPBB0); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB0); VPDominatorTree VPDT; VPDT.recalculate(Plan); @@ -165,6 +167,7 @@ // | // VPBB2 // + VPBasicBlock *VPPH = new VPBasicBlock("ph"); VPBasicBlock *R1BB1 = new VPBasicBlock("R1BB1"); VPBasicBlock *R1BB2 = new VPBasicBlock("R1BB2"); VPBasicBlock *R1BB3 = new VPBasicBlock("R1BB3"); @@ -191,8 +194,8 @@ VPBasicBlock *VPBB2 = new VPBasicBlock("VPBB2"); VPBlockUtils::connectBlocks(R1, VPBB2); - VPlan Plan; - Plan.setEntry(VPBB1); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB1); VPDominatorTree VPDT; VPDT.recalculate(Plan); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -106,14 +106,19 @@ edge [fontname=Courier, fontsize=30] compound=true N0 [label = + "ph:\l" + + " EMIT vp\<%1\> = EXPAND SCEV (-1 + %N)\l" + + "No successors\l" + ] + N1 [label = "vector.ph:\l" + "Successor(s): for.body\l" ] - N0 -> N1 [ label="" lhead=cluster_N2] - subgraph cluster_N2 { + N1 -> N2 [ label="" lhead=cluster_N3] + subgraph cluster_N3 { fontname=Courier label="\ for.body" - N1 [label = + N2 [label = "vector.body:\l" + " WIDEN-PHI ir\<%indvars.iv\> = phi ir\<0\>, ir\<%indvars.iv.next\>\l" + " EMIT ir\<%arr.idx\> = getelementptr ir\<%A\> ir\<%indvars.iv\>\l" + @@ -126,8 +131,8 @@ "No successors\l" ] } - N1 -> N3 [ label="" ltail=cluster_N2] - N3 [label = + N2 -> N4 [ label="" ltail=cluster_N3] + N4 [label = "for.end:\l" + "No successors\l" ] diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -239,6 +239,7 @@ } TEST(VPBasicBlockTest, getPlan) { { + VPBasicBlock *VPPH = new VPBasicBlock("ph"); VPBasicBlock *VPBB1 = new VPBasicBlock(); VPBasicBlock *VPBB2 = new VPBasicBlock(); VPBasicBlock *VPBB3 = new VPBasicBlock(); @@ -254,8 +255,8 @@ VPBlockUtils::connectBlocks(VPBB2, VPBB4); VPBlockUtils::connectBlocks(VPBB3, VPBB4); - VPlan Plan; - Plan.setEntry(VPBB1); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB1); EXPECT_EQ(&Plan, VPBB1->getPlan()); EXPECT_EQ(&Plan, VPBB2->getPlan()); @@ -264,6 +265,7 @@ } { + VPBasicBlock *VPPH = new VPBasicBlock("ph"); // VPBasicBlock is the entry into the VPlan, followed by a region. VPBasicBlock *R1BB1 = new VPBasicBlock(); VPBasicBlock *R1BB2 = new VPBasicBlock(); @@ -273,8 +275,9 @@ VPBasicBlock *VPBB1 = new VPBasicBlock(); VPBlockUtils::connectBlocks(VPBB1, R1); - VPlan Plan; - Plan.setEntry(VPBB1); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB1); + EXPECT_EQ(&Plan, VPBB1->getPlan()); EXPECT_EQ(&Plan, R1->getPlan()); EXPECT_EQ(&Plan, R1BB1->getPlan()); @@ -282,6 +285,8 @@ } { + VPBasicBlock *VPPH = new VPBasicBlock("ph"); + VPBasicBlock *R1BB1 = new VPBasicBlock(); VPBasicBlock *R1BB2 = new VPBasicBlock(); VPRegionBlock *R1 = new VPRegionBlock(R1BB1, R1BB2, "R1"); @@ -300,8 +305,9 @@ VPBlockUtils::connectBlocks(R1, VPBB2); VPBlockUtils::connectBlocks(R2, VPBB2); - VPlan Plan; - Plan.setEntry(VPBB1); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB1); + EXPECT_EQ(&Plan, VPBB1->getPlan()); EXPECT_EQ(&Plan, R1->getPlan()); EXPECT_EQ(&Plan, R1BB1->getPlan()); @@ -322,6 +328,7 @@ // \ / // VPBB4 // + VPBasicBlock *VPPH = new VPBasicBlock("ph"); VPBasicBlock *VPBB1 = new VPBasicBlock(); VPBasicBlock *VPBB2 = new VPBasicBlock(); VPBasicBlock *VPBB3 = new VPBasicBlock(); @@ -339,8 +346,8 @@ EXPECT_EQ(VPBB2, FromIterator[1]); // Use Plan to properly clean up created blocks. - VPlan Plan; - Plan.setEntry(VPBB1); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB1); } { @@ -362,6 +369,7 @@ // | // R2BB2 // + VPBasicBlock *VPPH = new VPBasicBlock("ph"); VPBasicBlock *VPBB0 = new VPBasicBlock("VPBB0"); VPBasicBlock *R1BB1 = new VPBasicBlock(); VPBasicBlock *R1BB2 = new VPBasicBlock(); @@ -438,8 +446,8 @@ EXPECT_EQ(R1, FromIterator[7]); // Use Plan to properly clean up created blocks. - VPlan Plan; - Plan.setEntry(VPBB0); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB0); } { @@ -463,6 +471,7 @@ // | // VPBB2 // + VPBasicBlock *VPPH = new VPBasicBlock("ph"); VPBasicBlock *R1BB1 = new VPBasicBlock("R1BB1"); VPBasicBlock *R1BB2 = new VPBasicBlock("R1BB2"); VPBasicBlock *R1BB3 = new VPBasicBlock("R1BB3"); @@ -520,8 +529,8 @@ EXPECT_EQ(VPBB1, FromIterator[9]); // Use Plan to properly clean up created blocks. - VPlan Plan; - Plan.setEntry(VPBB1); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB1); } { @@ -535,6 +544,7 @@ // R2BB2 // } // + VPBasicBlock *VPPH = new VPBasicBlock("ph"); VPBasicBlock *R2BB1 = new VPBasicBlock("R2BB1"); VPBasicBlock *R2BB2 = new VPBasicBlock("R2BB2"); VPRegionBlock *R2 = new VPRegionBlock(R2BB1, R2BB2, "R2"); @@ -567,8 +577,8 @@ EXPECT_EQ(VPBB1, FromIterator[4]); // Use Plan to properly clean up created blocks. - VPlan Plan; - Plan.setEntry(VPBB1); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB1); } { @@ -590,6 +600,7 @@ // | // VPBB2 // + VPBasicBlock *VPPH = new VPBasicBlock("ph"); VPBasicBlock *R3BB1 = new VPBasicBlock("R3BB1"); VPRegionBlock *R3 = new VPRegionBlock(R3BB1, R3BB1, "R3"); @@ -658,13 +669,17 @@ EXPECT_EQ(VPBB1, FromIterator[3]); // Use Plan to properly clean up created blocks. - VPlan Plan; - Plan.setEntry(VPBB1); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB1); } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) TEST(VPBasicBlockTest, print) { + VPInstruction *TC = new VPInstruction(Instruction::Add, {}); + VPBasicBlock *VPBB0 = new VPBasicBlock("preheader"); + VPBB0->appendRecipe(TC); + VPInstruction *I1 = new VPInstruction(Instruction::Add, {}); VPInstruction *I2 = new VPInstruction(Instruction::Sub, {I1}); VPInstruction *I3 = new VPInstruction(Instruction::Br, {I1, I2}); @@ -694,8 +709,7 @@ EXPECT_EQ("EMIT br ", I3Dump); } - VPlan Plan; - Plan.setEntry(VPBB1); + VPlan Plan(VPBB0, TC, VPBB1); std::string FullDump; raw_string_ostream OS(FullDump); Plan.printDOT(OS); @@ -706,17 +720,22 @@ edge [fontname=Courier, fontsize=30] compound=true N0 [label = - "bb1:\l" + + "preheader:\l" + " EMIT vp\<%1\> = add\l" + - " EMIT vp\<%2\> = sub vp\<%1\>\l" + - " EMIT br vp\<%1\> vp\<%2\>\l" + - "Successor(s): bb2\l" + "No successors\l" ] - N0 -> N1 [ label=""] N1 [label = + "bb1:\l" + + " EMIT vp\<%2\> = add\l" + + " EMIT vp\<%3\> = sub vp\<%2\>\l" + + " EMIT br vp\<%2\> vp\<%3\>\l" + + "Successor(s): bb2\l" + ] + N1 -> N2 [ label=""] + N2 [label = "bb2:\l" + - " EMIT vp\<%4\> = mul vp\<%2\> vp\<%1\>\l" + - " EMIT ret vp\<%4\>\l" + + " EMIT vp\<%5\> = mul vp\<%3\> vp\<%2\>\l" + + " EMIT ret vp\<%5\>\l" + "No successors\l" ] } @@ -724,9 +743,9 @@ EXPECT_EQ(ExpectedStr, FullDump); const char *ExpectedBlock1Str = R"(bb1: - EMIT vp<%1> = add - EMIT vp<%2> = sub vp<%1> - EMIT br vp<%1> vp<%2> + EMIT vp<%2> = add + EMIT vp<%3> = sub vp<%2> + EMIT br vp<%2> vp<%3> Successor(s): bb2 )"; std::string Block1Dump; @@ -736,8 +755,8 @@ // Ensure that numbering is good when dumping the second block in isolation. const char *ExpectedBlock2Str = R"(bb2: - EMIT vp<%4> = mul vp<%2> vp<%1> - EMIT ret vp<%4> + EMIT vp<%5> = mul vp<%3> vp<%2> + EMIT ret vp<%5> No successors )"; std::string Block2Dump; @@ -751,7 +770,7 @@ VPSlotTracker SlotTracker(&Plan); I3->print(OS, "", SlotTracker); OS.flush(); - EXPECT_EQ("EMIT br vp<%1> vp<%2>", I3Dump); + EXPECT_EQ("EMIT br vp<%2> vp<%3>", I3Dump); } { @@ -759,21 +778,24 @@ raw_string_ostream OS(I4Dump); OS << *I4; OS.flush(); - EXPECT_EQ("EMIT vp<%4> = mul vp<%2> vp<%1>", I4Dump); + EXPECT_EQ("EMIT vp<%5> = mul vp<%3> vp<%2>", I4Dump); } } TEST(VPBasicBlockTest, printPlanWithVFsAndUFs) { - VPInstruction *I1 = new VPInstruction(Instruction::Add, {}); + VPInstruction *TC = new VPInstruction(Instruction::Sub, {}); + VPBasicBlock *VPBB0 = new VPBasicBlock("preheader"); + VPBB0->appendRecipe(TC); + + VPInstruction *I1 = new VPInstruction(Instruction::Add, {}); VPBasicBlock *VPBB1 = new VPBasicBlock(); VPBB1->appendRecipe(I1); VPBB1->setName("bb1"); - VPlan Plan; + VPlan Plan(VPBB0, TC, VPBB1); Plan.setName("TestPlan"); Plan.addVF(ElementCount::getFixed(4)); - Plan.setEntry(VPBB1); { std::string FullDump; @@ -781,8 +803,14 @@ Plan.print(OS); const char *ExpectedStr = R"(VPlan 'TestPlan for VF={4},UF>=1' { +vp<%1> = original trip-count + +preheader: + EMIT vp<%1> = sub +No successors + bb1: - EMIT vp<%1> = add + EMIT vp<%2> = add No successors } )"; @@ -796,8 +824,14 @@ Plan.print(OS); const char *ExpectedStr = R"(VPlan 'TestPlan for VF={4,vscale x 8},UF>=1' { +vp<%1> = original trip-count + +preheader: + EMIT vp<%1> = sub +No successors + bb1: - EMIT vp<%1> = add + EMIT vp<%2> = add No successors } )"; @@ -811,8 +845,14 @@ Plan.print(OS); const char *ExpectedStr = R"(VPlan 'TestPlan for VF={4,vscale x 8},UF={4}' { +vp<%1> = original trip-count + +preheader: + EMIT vp<%1> = sub +No successors + bb1: - EMIT vp<%1> = add + EMIT vp<%2> = add No successors } )"; @@ -1182,9 +1222,9 @@ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) TEST(VPRecipeTest, dump) { - VPlan Plan; + VPBasicBlock *VPBB0 = new VPBasicBlock("preheader"); VPBasicBlock *VPBB1 = new VPBasicBlock(); - Plan.setEntry(VPBB1); + VPlan Plan(VPBB0, VPBB1); LLVMContext C; diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -64,7 +64,8 @@ VPlanPtr buildHCFG(BasicBlock *LoopHeader) { doAnalysis(*LoopHeader->getParent()); - auto Plan = std::make_unique(); + auto Plan = VPlan::createInitialVPlan( + SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE); VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan); HCFGBuilder.buildHierarchicalCFG(); return Plan; @@ -74,7 +75,8 @@ VPlanPtr buildPlainCFG(BasicBlock *LoopHeader) { doAnalysis(*LoopHeader->getParent()); - auto Plan = std::make_unique(); + auto Plan = VPlan::createInitialVPlan( + SE->getBackedgeTakenCount(LI->getLoopFor(LoopHeader)), *SE); VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan); VPBasicBlock *EntryVPBB = HCFGBuilder.buildPlainCFG(); Plan->setEntry(EntryVPBB); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -19,12 +19,13 @@ VPInstruction *DefI = new VPInstruction(Instruction::Add, {}); VPInstruction *UseI = new VPInstruction(Instruction::Sub, {DefI}); + VPBasicBlock *VPPH = new VPBasicBlock("ph"); VPBasicBlock *VPBB1 = new VPBasicBlock(); VPBB1->appendRecipe(UseI); VPBB1->appendRecipe(DefI); - VPlan Plan; - Plan.setEntry(VPBB1); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB1); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); @@ -43,6 +44,7 @@ VPInstruction *BranchOnCond = new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); + VPBasicBlock *VPPH = new VPBasicBlock("ph"); VPBasicBlock *VPBB1 = new VPBasicBlock(); VPBasicBlock *VPBB2 = new VPBasicBlock(); @@ -54,8 +56,8 @@ VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1"); VPBlockUtils::connectBlocks(VPBB1, R1); - VPlan Plan; - Plan.setEntry(VPBB1); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB1); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); @@ -79,6 +81,7 @@ new VPInstruction(VPInstruction::BranchOnCond, {CanIV}); auto *Blend = new VPBlendRecipe(Phi, {DefI}); + VPBasicBlock *VPPH = new VPBasicBlock("ph"); VPBasicBlock *VPBB1 = new VPBasicBlock(); VPBasicBlock *VPBB2 = new VPBasicBlock(); VPBasicBlock *VPBB3 = new VPBasicBlock(); @@ -95,8 +98,8 @@ VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB4, "R1"); VPBlockUtils::connectBlocks(VPBB1, R1); - VPlan Plan; - Plan.setEntry(VPBB1); + auto TC = std::make_unique(); + VPlan Plan(VPPH, &*TC, VPBB1); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr();