diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -523,7 +523,8 @@ /// the corresponding type. void widenIntOrFpInduction(PHINode *IV, Value *Start, TruncInst *Trunc, VPValue *Def, VPValue *CastDef, VPValue *StepDef, - VPValue *PhiDef, VPTransformState &State); + VPValue *PhiDef, VPTransformState &State, + Value *PrimInd); /// Construct the vector value of a scalarized value \p V one lane at a time. void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance, @@ -584,9 +585,7 @@ Value *CountRoundDown, Value *EndValue, BasicBlock *MiddleBlock); - /// Create a new induction variable inside L. - PHINode *createInductionVariable(Loop *L, Value *Start, Value *End, - Value *Step, Instruction *DL); + void createLatchTerminator(Loop *L); /// Handle all cross-iteration phis in the header. void fixCrossIterationPHIs(VPTransformState &State); @@ -841,12 +840,11 @@ /// A list of all bypass blocks. The first block is the entry of the loop. SmallVector LoopBypassBlocks; - /// The new Induction variable which was added to the new block. - PHINode *Induction = nullptr; - /// The induction variable of the old basic block. PHINode *OldInduction = nullptr; + Value *IVStartValue = nullptr; + /// Store instructions that were predicated. SmallVector PredicatedInstructions; @@ -2497,7 +2495,8 @@ void InnerLoopVectorizer::widenIntOrFpInduction( PHINode *IV, Value *Start, TruncInst *Trunc, VPValue *Def, VPValue *CastDef, - VPValue *StepDef, VPValue *PhiDef, VPTransformState &State) { + VPValue *StepDef, VPValue *PhiDef, VPTransformState &State, + Value *PrimInd) { assert((IV->getType()->isIntegerTy() || IV != OldInduction) && "Primary induction variable must have an integer type"); @@ -2531,12 +2530,12 @@ // induction variable and step. Otherwise, derive these values from the // induction descriptor. auto CreateScalarIV = [&](Value *&Step) -> Value * { - Value *ScalarIV = Induction; + Value *ScalarIV = PrimInd; if (IV != OldInduction) { - ScalarIV = IV->getType()->isIntegerTy() - ? Builder.CreateSExtOrTrunc(Induction, IV->getType()) - : Builder.CreateCast(Instruction::SIToFP, Induction, - IV->getType()); + ScalarIV = + IV->getType()->isIntegerTy() + ? Builder.CreateSExtOrTrunc(PrimInd, IV->getType()) + : Builder.CreateCast(Instruction::SIToFP, PrimInd, IV->getType()); ScalarIV = emitTransformedIndex(Builder, ScalarIV, PSE.getSE(), DL, ID); ScalarIV->setName("offset.idx"); } @@ -3186,9 +3185,7 @@ PredicatedInstructions.push_back(Cloned); } -PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start, - Value *End, Value *Step, - Instruction *DL) { +void InnerLoopVectorizer::createLatchTerminator(Loop *L) { BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); // As we're just creating this loop, it's possible no latch exists @@ -3198,31 +3195,15 @@ IRBuilder<> B(&*Header->getFirstInsertionPt()); Instruction *OldInst = getDebugLocFromInstOrOperands(OldInduction); - setDebugLocFromInst(OldInst, &B); - auto *Induction = B.CreatePHI(Start->getType(), 2, "index"); B.SetInsertPoint(Latch->getTerminator()); setDebugLocFromInst(OldInst, &B); - // Create i+1 and fill the PHINode. - // - // If the tail is not folded, we know that End - Start >= Step (either - // statically or through the minimum iteration checks). We also know that both - // Start % Step == 0 and End % Step == 0. We exit the vector loop if %IV + - // %Step == %End. Hence we must exit the loop before %IV + %Step unsigned - // overflows and we can mark the induction increment as NUW. - Value *Next = B.CreateAdd(Induction, Step, "index.next", - /*NUW=*/!Cost->foldTailByMasking(), /*NSW=*/false); - Induction->addIncoming(Start, L->getLoopPreheader()); - Induction->addIncoming(Next, Latch); // Create the compare. - Value *ICmp = B.CreateICmpEQ(Next, End); - B.CreateCondBr(ICmp, L->getUniqueExitBlock(), Header); + B.CreateCondBr(B.getTrue(), L->getUniqueExitBlock(), Header); // Now we have two terminators. Remove the old one from the block. Latch->getTerminator()->eraseFromParent(); - - return Induction; } Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) { @@ -3830,28 +3811,13 @@ // faster. emitMemRuntimeChecks(Lp, LoopScalarPreHeader); - // Some loops have a single integer induction variable, while other loops - // don't. One example is c++ iterators that often have multiple pointer - // induction variables. In the code below we also support a case where we - // don't have a single induction variable. - // - // We try to obtain an induction variable from the original loop as hard - // as possible. However if we don't find one that: - // - is an integer - // - counts from zero, stepping by one - // - is the size of the widest induction variable type - // then we create a new one. OldInduction = Legal->getPrimaryInduction(); - Type *IdxTy = Legal->getWidestInductionType(); - Value *StartIdx = ConstantInt::get(IdxTy, 0); + // The loop step is equal to the vectorization factor (num of SIMD elements) // times the unroll factor (num of SIMD instructions). Builder.SetInsertPoint(&*Lp->getHeader()->getFirstInsertionPt()); - Value *Step = createStepForVF(Builder, IdxTy, VF, UF); Value *CountRoundDown = getOrCreateVectorTripCount(Lp); - Induction = - createInductionVariable(Lp, StartIdx, CountRoundDown, Step, - getDebugLocFromInstOrOperands(OldInduction)); + createLatchTerminator(Lp); // Emit phis for the new starting index of the scalar loop. createInductionResumeValues(Lp, CountRoundDown); @@ -4827,6 +4793,10 @@ InductionDescriptor II = Legal->getInductionVars().lookup(P); const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout(); + auto *Plan = PhiR->getParent()->getPlan(); + auto *IVR = Plan->getCanonicalIV(); + PHINode *PrimInd = cast(State.get(IVR->getVPSingleValue(), 0)); + // FIXME: The newly created binary instructions should contain nsw/nuw flags, // which can be found from the original scalar operations. switch (II.getKind()) { @@ -4842,7 +4812,7 @@ if (Cost->isScalarAfterVectorization(P, State.VF)) { // This is the normalized GEP that starts counting at zero. Value *PtrInd = - Builder.CreateSExtOrTrunc(Induction, II.getStep()->getType()); + Builder.CreateSExtOrTrunc(PrimInd, II.getStep()->getType()); // Determine the number of scalars we need to generate for each unroll // iteration. If the instruction is uniform, we only need to generate the // first lane. Otherwise, we generate all VF values. @@ -4899,7 +4869,7 @@ Value *ScalarStartValue = II.getStartValue(); Type *ScStValueType = ScalarStartValue->getType(); PHINode *NewPointerPhi = - PHINode::Create(ScStValueType, 2, "pointer.phi", Induction); + PHINode::Create(ScStValueType, 2, "pointer.phi", PrimInd); NewPointerPhi->addIncoming(ScalarStartValue, LoopVectorPreHeader); // A pointer induction, performed by using a gep @@ -8379,7 +8349,7 @@ VPTransformState State{LI, DT, ILV.Builder, &ILV, &BestVPlan}; State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(); State.TripCount = ILV.getOrCreateTripCount(nullptr); - State.CanonicalIV = ILV.Induction; + State.VectorTripCount = ILV.getOrCreateVectorTripCount(nullptr); ILV.collectPoisonGeneratingRecipes(State); ILV.printDebugTracesAtStart(); @@ -8395,6 +8365,13 @@ // 2. Copy and widen instructions from the old loop into the new loop. BestVPlan.getVectorLoopRegion()->setVFandUF(BestVF, BestUF); State.setVFandUF(BestVF, BestUF); + if (ILV.IVStartValue) { + VPBasicBlock *Header = BestVPlan.getEntry()->getEntryBasicBlock(); + auto *IV = cast(&*Header->begin()); + VPValue *VPV = new VPValue(ILV.IVStartValue); + BestVPlan.addExternalDef(VPV); + IV->setOperand(0, VPV); + } BestVPlan.execute(&State); // 3. Fix the vectorized code: take care of header phi's, live-outs, @@ -8560,16 +8537,9 @@ // Generate the induction variable. OldInduction = Legal->getPrimaryInduction(); - Type *IdxTy = Legal->getWidestInductionType(); - Value *StartIdx = ConstantInt::get(IdxTy, 0); - - IRBuilder<> B(&*Lp->getLoopPreheader()->getFirstInsertionPt()); - Value *Step = getRuntimeVF(B, IdxTy, VF * UF); Value *CountRoundDown = getOrCreateVectorTripCount(Lp); EPI.VectorTripCount = CountRoundDown; - Induction = - createInductionVariable(Lp, StartIdx, CountRoundDown, Step, - getDebugLocFromInstOrOperands(OldInduction)); + createLatchTerminator(Lp); // Skip induction resume value creation here because they will be created in // the second pass. If we created them here, they wouldn't be used anyway, @@ -8723,14 +8693,11 @@ EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0), EPI.MainLoopIterationCountCheck); + IVStartValue = EPResumeVal; + // Generate the induction variable. OldInduction = Legal->getPrimaryInduction(); - Value *CountRoundDown = getOrCreateVectorTripCount(Lp); - Constant *Step = ConstantInt::get(IdxTy, VF.getKnownMinValue() * UF); - Value *StartIdx = EPResumeVal; - Induction = - createInductionVariable(Lp, StartIdx, CountRoundDown, Step, - getDebugLocFromInstOrOperands(OldInduction)); + createLatchTerminator(Lp); // Generate induction resume values. These variables save the new starting // indexes for the scalar loop. They are used to test if there are any tail @@ -8739,6 +8706,7 @@ // check, then the resume value for the induction variable comes from // the trip count of the main vector loop, hence passing the AdditionalBypass // argument. + Value *CountRoundDown = getOrCreateVectorTripCount(Lp); createInductionResumeValues(Lp, CountRoundDown, {VecEpilogueIterationCountCheck, EPI.VectorTripCount} /* AdditionalBypass */); @@ -9560,7 +9528,9 @@ !Plan->getEntry()->getEntryBasicBlock()->empty() && "entry block must be set to a VPRegionBlock having a non-empty entry " "VPBasicBlock"); - cast(Plan->getEntry())->setExit(VPBB); + VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); + TopRegion->setExit(VPBB); + RecipeBuilder.fixHeaderPhis(); // --------------------------------------------------------------------------- @@ -9697,6 +9667,21 @@ } } + Type *IdxTy = Legal->getWidestInductionType(); + Value *StartIdx = ConstantInt::get(IdxTy, 0); + auto *StartV = Plan->getOrAddVPValue(StartIdx); + + auto *PrimaryInd = new VPCanonicalIVRecipe(StartV); + PrimaryInd->insertBefore( + &*TopRegion->getEntry()->getEntryBasicBlock()->begin()); + + auto *InductionIncrement = cast(new VPInstruction( + !CM.foldTailByMasking() ? VPInstruction::InductionIncrementNUW + : VPInstruction::InductionIncrement, + {PrimaryInd->getVPSingleValue()})); + PrimaryInd->addOperand(InductionIncrement->getVPSingleValue()); + VPBB->appendRecipe(InductionIncrement); + // From this point onwards, VPlan-to-VPlan transformations may change the plan // in ways that accessing values using original IR values is incorrect. Plan->disableValue2VPValue(); @@ -9753,6 +9738,25 @@ VPlanTransforms::VPInstructionsToVPRecipes(OrigLoop, Plan, Legal->getInductionVars(), DeadInstructions, *PSE.getSE()); + + Type *IdxTy = Legal->getWidestInductionType(); + Value *StartIdx = ConstantInt::get(IdxTy, 0); + auto *StartV = Plan->getOrAddVPValue(StartIdx); + + auto *PrimaryInd = new VPCanonicalIVRecipe(StartV); + VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); + PrimaryInd->insertBefore( + &*cast(TopRegion->getEntry()->getSingleSuccessor()) + ->begin()); + + auto *InductionIncrement = new VPInstruction( + VPInstruction::InductionIncrementNUW, {PrimaryInd->getVPSingleValue()}); + PrimaryInd->addOperand(InductionIncrement->getVPSingleValue()); + + VPBasicBlock *OuterLatch = + cast(TopRegion->getExit()->getSinglePredecessor()); + OuterLatch->insert(InductionIncrement, std::prev(OuterLatch->end())); + return Plan; } @@ -9909,9 +9913,11 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Int or FP induction being replicated."); + auto *PrimInd = State.get( + getParent()->getPlan()->getCanonicalIV()->getVPSingleValue(), 0); State.ILV->widenIntOrFpInduction( IV, getStartValue()->getLiveInIRValue(), getTruncInst(), getVPValue(0), - getCastValue(), getStepValue(), getPhiValue(), State); + getCastValue(), getStepValue(), getPhiValue(), State, PrimInd); } void VPWidenPHIRecipe::execute(VPTransformState &State) { @@ -10668,6 +10674,7 @@ Checks); VPlan &BestEpiPlan = LVP.getBestPlanFor(EPI.EpilogueVF); + LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT); ++LoopsEpilogueVectorized; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -353,6 +353,8 @@ /// Pointer to the VPlan code is generated for. VPlan *Plan; + Value *VectorTripCount; + /// Holds recipes that may generate a poison value that is used after /// vectorization, even when their operands are not poison. SmallPtrSet MayGeneratePoisonRecipes; @@ -794,6 +796,8 @@ SLPLoad, SLPStore, ActiveLaneMask, + InductionIncrement, + InductionIncrementNUW, }; private: @@ -1610,6 +1614,30 @@ #endif }; +class VPCanonicalIVRecipe : public VPRecipeBase { +public: + VPCanonicalIVRecipe(VPValue *StartV) + : VPRecipeBase(VPCanonicalIVSC, {StartV}) { + new VPValue(nullptr, this); + } + + ~VPCanonicalIVRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPCanonicalIVSC; + } + + /// Generate a canonical vector induction variable of the vector loop, with + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A Recipe for widening the canonical induction variable of the vector loop. class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue { public: @@ -2280,6 +2308,14 @@ return cast(getEntry()); } + VPCanonicalIVRecipe *getCanonicalIV() { + VPBasicBlock *EntryVPBB = getEntry()->getEntryBasicBlock(); + if (EntryVPBB->empty()) + // VPlan native path. + EntryVPBB = cast(EntryVPBB->getSingleSuccessor()); + return cast(&*EntryVPBB->phis().begin()); + } + private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -60,6 +60,14 @@ } #endif +static Value *createStepForVF(IRBuilder<> &B, Constant *Step, ElementCount VF) { + assert(isa(Step) && "Expected an integer step"); + Constant *StepVal = ConstantInt::get(Step->getType(), + cast(Step)->getSExtValue() * + VF.getKnownMinValue()); + return VF.isScalable() ? B.CreateVScale(StepVal) : StepVal; +} + Value *VPLane::getAsRuntimeExpr(IRBuilder<> &Builder, const ElementCount &VF) const { switch (LaneKind) { @@ -660,6 +668,9 @@ } switch (getOpcode()) { + case Instruction::PHI: { + break; + } case VPInstruction::Not: { Value *A = State.get(getOperand(0), Part); Value *V = Builder.CreateNot(A); @@ -719,6 +730,20 @@ } break; } + + case VPInstruction::InductionIncrement: + case VPInstruction::InductionIncrementNUW: { + if (Part == 0) { + bool IsNUW = getOpcode() == VPInstruction::InductionIncrementNUW; + auto *Phi = State.get(getOperand(0), Part); + Value *Step = createStepForVF( + Builder, ConstantInt::get(Phi->getType(), State.UF), State.VF); + Value *Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false); + State.set(getVPSingleValue(), Next, Part); + } + + break; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -766,6 +791,12 @@ case VPInstruction::FirstOrderRecurrenceSplice: O << "first-order splice"; break; + case VPInstruction::InductionIncrement: + O << "induction increment"; + break; + case VPInstruction::InductionIncrementNUW: + O << "induction increment nuw"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -837,6 +868,41 @@ for (VPBlockBase *Block : depth_first(Entry)) Block->execute(State); + // Setup branch terminator successors for VPBBs in VPBBsToFix based on + // VPBB's successors. + for (auto VPBB : State->CFG.VPBBsToFix) { + assert(EnableVPlanNativePath && + "Unexpected VPBBsToFix in non VPlan-native path"); + BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB]; + assert(BB && "Unexpected null basic block for VPBB"); + + unsigned Idx = 0; + auto *BBTerminator = BB->getTerminator(); + + for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) { + VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock(); + BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]); + ++Idx; + } + } + + // 3. Merge the temporary latch created with the last basic-block filled. + BasicBlock *LastBB = State->CFG.PrevBB; + // Connect LastBB to VectorLatchBB to facilitate their merge. + assert((EnableVPlanNativePath || + isa(LastBB->getTerminator())) && + "Expected InnerLoop VPlan CFG to terminate with unreachable"); + assert((!EnableVPlanNativePath || isa(LastBB->getTerminator())) && + "Expected VPlan CFG to terminate with branch in NativePath"); + LastBB->getTerminator()->eraseFromParent(); + BranchInst::Create(VectorLatchBB, LastBB); + + // Merge LastBB with Latch. + bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI); + (void)Merged; + assert(Merged && "Could not merge last basic block with latch."); + VectorLatchBB = LastBB; + // Fix the latch value of reduction and first-order recurrences phis in the // vector loop. VPBasicBlock *Header = Entry->getEntryBasicBlock(); @@ -845,6 +911,21 @@ Header = cast(Header->getSingleSuccessor()); } for (VPRecipeBase &R : Header->phis()) { + if (auto *Ind = dyn_cast(&R)) { + auto *BackedgeValue = cast(Ind->getOperand(1)->getDef()); + auto *P = cast(State->get(Ind->getVPSingleValue(), 0)); + P->setName("index"); + BasicBlock *LatchBB = State->CFG.VPBB2IRBB[BackedgeValue->getParent()]; + P->addIncoming(State->get(BackedgeValue, 0), LatchBB); + auto *Next = cast(P->getIncomingValueForBlock(LatchBB)); + auto *TermBr = cast(LatchBB->getTerminator()); + State->Builder.SetInsertPoint(TermBr); + auto *ICmp = cast( + State->Builder.CreateICmpEQ(Next, State->VectorTripCount)); + TermBr->setCondition(ICmp); + continue; + } + if (auto *IV = dyn_cast(&R)) { if (!State->hasAnyVectorValue(IV->getStepValue())) continue; @@ -857,8 +938,8 @@ auto *LoopVectorLatch = State->LI->getLoopFor(State->CFG.PrevBB)->getLoopLatch(); auto *Br = cast(LoopVectorLatch->getTerminator()); - auto *ICmp = cast(Br->getCondition()); - LastInduction->moveBefore(ICmp); + auto *Cmp = cast(Br->getCondition()); + LastInduction->moveBefore(Cmp); LastInduction->setName("vec.ind.next"); VecInd->addIncoming(LastInduction, LoopVectorLatch); @@ -882,41 +963,6 @@ } } - // Setup branch terminator successors for VPBBs in VPBBsToFix based on - // VPBB's successors. - for (auto VPBB : State->CFG.VPBBsToFix) { - assert(EnableVPlanNativePath && - "Unexpected VPBBsToFix in non VPlan-native path"); - BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB]; - assert(BB && "Unexpected null basic block for VPBB"); - - unsigned Idx = 0; - auto *BBTerminator = BB->getTerminator(); - - for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) { - VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock(); - BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]); - ++Idx; - } - } - - // 3. Merge the temporary latch created with the last basic-block filled. - BasicBlock *LastBB = State->CFG.PrevBB; - // Connect LastBB to VectorLatchBB to facilitate their merge. - assert((EnableVPlanNativePath || - isa(LastBB->getTerminator())) && - "Expected InnerLoop VPlan CFG to terminate with unreachable"); - assert((!EnableVPlanNativePath || isa(LastBB->getTerminator())) && - "Expected VPlan CFG to terminate with branch in NativePath"); - LastBB->getTerminator()->eraseFromParent(); - BranchInst::Create(VectorLatchBB, LastBB); - - // Merge LastBB with Latch. - bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI); - (void)Merged; - assert(Merged && "Could not merge last basic block with latch."); - VectorLatchBB = LastBB; - // We do not attempt to preserve DT for outer loop vectorization currently. if (!EnableVPlanNativePath) updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB, @@ -1290,8 +1336,27 @@ } #endif +void VPCanonicalIVRecipe::execute(VPTransformState &State) { + Value *Start = getOperand(0)->getLiveInIRValue(); + PHINode *EntryPart = PHINode::Create( + Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); + EntryPart->addIncoming(Start, State.CFG.VectorPreHeader); + State.set(getVPSingleValue(), EntryPart, 0); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + getVPSingleValue()->printAsOperand(O, SlotTracker); + O << " = CANONICAL-INDUCTION"; +} +#endif + void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { - Value *CanonicalIV = State.CanonicalIV; + + Value *CanonicalIV = State.get( + getParent()->getPlan()->getCanonicalIV()->getVPSingleValue(), 0); Type *STy = CanonicalIV->getType(); IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); ElementCount VF = State.VF; diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -101,6 +101,7 @@ // Phi-like VPValues. Need to be kept together. VPVBlendSC, + VPVCanonicalIVSC, VPVFirstOrderRecurrencePHISC, VPVWidenPHISC, VPVWidenCanonicalIVSC, @@ -332,6 +333,7 @@ // Phi-like recipes. Need to be kept together. VPBlendSC, + VPCanonicalIVSC, VPFirstOrderRecurrencePHISC, VPWidenPHISC, VPWidenCanonicalIVSC, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -10,6 +10,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-PHI %ptr.iv.1 = phi %start.1, %ptr.iv.1.next ; CHECK-NEXT: WIDEN-PHI %ptr.iv.2 = phi %start.2, %ptr.iv.2.next @@ -18,6 +19,7 @@ ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%ptr.iv.2> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%lv>, ir<1> ; CHECK-NEXT: WIDEN store ir<%ptr.iv.2>, ir<%add> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -1675,8 +1675,8 @@ ; FVW2-NEXT: [[TMP33:%.*]] = extractelement <2 x float> [[WIDE_LOAD18]], i32 1 ; FVW2-NEXT: store float [[TMP33]], float* [[TMP29]], align 4, !alias.scope !10, !noalias !12 ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; FVW2-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FVW2-NEXT: [[PTR_IND]] = getelementptr float, float* [[POINTER_PHI]], i64 64 +; FVW2-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; FVW2-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; FVW2: middle.block: ; FVW2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -13,6 +13,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> @@ -61,6 +62,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: loop.1.split: ; CHECK-NEXT: WIDEN ir<%add> = add ir<%conv>, vp<[[PRED2]]> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: No successors @@ -92,6 +94,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> @@ -125,6 +128,7 @@ ; CHECK-NEXT: Successor(s): loop.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: No successors @@ -156,6 +160,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next> @@ -187,6 +192,7 @@ ; CHECK-NEXT: WIDEN ir<%add> = add vp<[[PRED]]>, ir<%recur.next> ; CHECK-NEXT: WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add> ; CHECK-NEXT: EMIT vp<[[SEL:%.+]]> = select vp<[[MASK]]> ir<%and.red.next> ir<%and.red> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: No successors @@ -221,6 +227,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> @@ -275,6 +282,7 @@ ; CHECK-NEXT: WIDEN ir<%add.1> = add ir<%conv>, vp<[[PRED1]]> ; CHECK-NEXT: WIDEN ir<%conv.lv.2> = sext vp<[[PRED2]]> ; CHECK-NEXT: WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: No successors @@ -310,6 +318,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next> ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> @@ -361,6 +370,7 @@ ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: No successors diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -39,9 +39,11 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN ir<%cond0> = icmp ir<%iv>, ir<13> ; CHECK-NEXT: WIDEN-SELECT ir<%s> = select ir<%cond0>, ir<10>, ir<20> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successor ; CHECK-NEXT: } define void @test() { diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll --- a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll +++ b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll @@ -42,9 +42,9 @@ ; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_1_LATCH5]], label [[LOOP_2_HEADER1]] ; CHECK: loop.1.latch5: ; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[TMP8]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -62,8 +62,8 @@ ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]] ; CHECK: pred.store.continue7: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 -4 +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll @@ -12,12 +12,14 @@ ; CHECK-NEXT: label="\ vector loop" ; CHECK-NEXT: N1 [label = ; CHECK-NEXT: "for.body:\l" + +; CHECK-NEXT: " EMIT vp\<[[CAN_IV:%.+]]\> = CANONICAL-INDUCTION\l" + ; CHECK-NEXT: " WIDEN-INDUCTION %iv = phi %iv.next, 0\l" + ; CHECK-NEXT: " CLONE ir\<%arrayidx\> = getelementptr ir\<%y\>, ir\<%iv\>\l" + ; CHECK-NEXT: " WIDEN ir\<%lv\> = load ir\<%arrayidx\>\l" + ; CHECK-NEXT: " WIDEN-CALL ir\<%call\> = call @llvm.sqrt.f32(ir\<%lv\>)\l" + ; CHECK-NEXT: " CLONE ir\<%arrayidx2\> = getelementptr ir\<%x\>, ir\<%iv\>\l" + ; CHECK-NEXT: " WIDEN store ir\<%arrayidx2\>, ir\<%call\>\l" + +; CHECK-NEXT: " EMIT vp\<{{.+}}\> = induction increment nuw vp\<[[CAN_IV]]\>\l" + ; CHECK-NEXT: "No successors\l" ; CHECK-NEXT: ] ; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -11,12 +11,14 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0 ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: WIDEN-CALL ir<%call> = call @llvm.sqrt.f32(ir<%lv>) ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr ir<%x>, ir<%iv> ; CHECK-NEXT: WIDEN store ir<%arrayidx2>, ir<%call> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: No successors @@ -46,6 +48,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0 ; CHECK-NEXT: WIDEN-GEP Inv[Var] ir<%arrayidx> = getelementptr ir<%y>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> @@ -54,6 +57,7 @@ ; CHECK-NEXT: WIDEN ir<%add> = fadd ir<%lv>, ir<%sel> ; CHECK-NEXT: CLONE ir<%arrayidx2> = getelementptr ir<%x>, ir<%iv> ; CHECK-NEXT: WIDEN store ir<%arrayidx2>, ir<%add> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: No successors @@ -85,11 +89,13 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi %iv.next, 0 ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%y>, ir<%iv> ; CHECK-NEXT: WIDEN ir<%lv> = load ir<%arrayidx> ; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: No successors @@ -117,6 +123,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %i = phi 0, %i.next ; CHECK-NEXT: WIDEN ir<%cmp> = icmp ir<%i>, ir<5> ; CHECK-NEXT: Successor(s): if.then @@ -148,6 +155,7 @@ ; CHECK-NEXT: BLEND %d = ir<0>/vp<[[NOT]]> vp<[[PRED]]>/ir<%cmp> ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%x>, ir<%i> ; CHECK-NEXT: WIDEN store ir<%idx>, ir<%d> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: No successors @@ -185,6 +193,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: CLONE ir<%gep.AB.0> = getelementptr ir<@AB>, ir<0>, ir<%iv> ; CHECK-NEXT: INTERLEAVE-GROUP with factor 4 at %AB.0, ir<%gep.AB.0> @@ -206,6 +215,7 @@ ; CHECK-NEXT: store ir<1> to index 1 ; CHECK-NEXT: store ir<2> to index 2 ; CHECK-NEXT: store ir<%AB.3> to index 3 +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: No successors @@ -247,6 +257,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%sum.07> = phi ir<0.000000e+00>, ir<%muladd> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr ir<%a>, ir<%iv> @@ -255,6 +266,7 @@ ; CHECK-NEXT: WIDEN ir<%l.b> = load ir<%arrayidx2> ; CHECK-NEXT: EMIT vp<[[FMUL:%.]]> = fmul nnan ninf nsz ir<%l.a> ir<%l.b> ; CHECK-NEXT: REDUCE ir<[[MULADD:%.+]]> = ir<%sum.07> + nnan ninf nsz reduce.fadd (vp<[[FMUL]]>) +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -10,6 +10,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: for.body: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %tmp0 = phi %tmp6, 0 ; CHECK-NEXT: WIDEN-INDUCTION %tmp1 = phi %tmp7, 0 ; CHECK-NEXT: CLONE ir<%tmp2> = getelementptr ir<%ptr>, ir<%tmp0> @@ -40,6 +41,7 @@ ; CHECK-NEXT: Successor(s): for.inc ; CHECK: for.inc: +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; CHECK-NEXT: No successors diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -15,6 +15,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): loop.0 @@ -45,6 +46,7 @@ ; CHECK: loop.1: ; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> ; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -76,6 +78,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.load @@ -119,6 +122,7 @@ ; CHECK: loop.1: ; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> ; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -150,6 +154,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: Successor(s): pred.load @@ -193,6 +198,7 @@ ; CHECK: loop.1: ; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> ; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -226,9 +232,10 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 21, %iv.next -; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION -; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[CAN_IV]]> vp<[[BTC]]> +; CHECK-NEXT: EMIT vp<[[WIDE_CAN_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION +; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDE_CAN_IV]]> vp<[[BTC]]> ; CHECK-NEXT: CLONE ir<%gep.A.uniform> = getelementptr ir<%A>, ir<0> ; CHECK-NEXT: Successor(s): pred.load ; CHECK-EMPTY: @@ -277,6 +284,7 @@ ; CHECK-NEXT: Successor(s): loop.latch ; CHECK-EMPTY: ; CHECK-NEXT: loop.latch: +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -311,6 +319,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN ir<%c.1> = icmp ir<%iv>, ir<%j> ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> @@ -367,6 +376,7 @@ ; CHECK-NEXT: next.0.0: ; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> ; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -407,6 +417,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> ; CHECK-NEXT: WIDEN ir<%c.0> = icmp ir<%iv>, ir<%j> @@ -471,6 +482,7 @@ ; CHECK-NEXT: next.1: ; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> ; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -518,6 +530,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%iv>, ir<10> ; CHECK-NEXT: WIDEN ir<%c.0> = icmp ir<%iv>, ir<%j> @@ -580,6 +593,7 @@ ; CHECK-NEXT: next.1: ; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> ; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -623,6 +637,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv> @@ -690,6 +705,7 @@ ; CHECK-NEXT: latch: ; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> ; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -733,6 +749,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> ; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv> @@ -766,6 +783,7 @@ ; CHECK-NEXT: loop.2: ; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> ; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -795,6 +813,7 @@ ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, ir<%lv.a> ; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]> @@ -844,6 +863,7 @@ ; CHECK-NEXT: loop.2: ; CHECK-NEXT: CLONE ir<%large> = icmp ir<%iv>, ir<8> ; CHECK-NEXT: CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k> +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -872,6 +892,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop.header: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: Successor(s): loop.then ; CHECK-EMPTY: @@ -906,6 +927,7 @@ ; CHECK-NEXT: Successor(s): loop.latch ; CHECK-EMPTY: ; CHECK-NEXT: loop.latch: +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -939,6 +961,7 @@ ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: loop.header: +; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next ; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%addr>, ir<%iv> ; CHECK-NEXT: Successor(s): loop.body @@ -972,6 +995,7 @@ ; CHECK-NEXT: Successor(s): loop.latch ; CHECK-EMPTY: ; CHECK-NEXT: loop.latch: +; CHECK-NEXT: EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-vectorize-inner-loop-reduction.ll @@ -41,9 +41,9 @@ ; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, double* %c.out, <4 x i64> %[[VEC_INDEX]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %[[REDUCTION]], <4 x double*> %[[C_PTR]], i32 8, <4 x i1> ) ; CHECK-NEXT: %[[VEC_INDEX_NEXT:.*]] = add nuw nsw <4 x i64> %[[VEC_INDEX]], +; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add nuw i64 %[[FOR1_INDEX]], 4 ; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i64> %[[VEC_INDEX_NEXT]], ; CHECK-NEXT: %{{.*}} = extractelement <4 x i1> %[[VEC_PTR]], i32 0 -; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add nuw i64 %[[FOR1_INDEX]], 4 ; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], ; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 %[[EXIT_COND]], label %{{.*}}, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll b/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll @@ -31,9 +31,9 @@ ; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, double* %c.out, <4 x i64> %[[VEC_INDEX]] ; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %[[REDUCTION]], <4 x double*> %[[C_PTR]], i32 8, <4 x i1> ) ; CHECK-NEXT: %[[VEC_INDEX_NEXT:.*]] = add nuw nsw <4 x i64> %[[VEC_INDEX]], +; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add nuw i64 %[[FOR1_INDEX]], 4 ; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i64> %[[VEC_INDEX_NEXT]], ; CHECK-NEXT: %{{.*}} = extractelement <4 x i1> %[[VEC_PTR]], i32 0 -; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add nuw i64 %[[FOR1_INDEX]], 4 ; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], ; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 %[[EXIT_COND]], label %{{.*}}, label %vector.body