diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8418,15 +8418,8 @@ assert(CM.foldTailByMasking() && "must fold the tail"); VPBasicBlock *HeaderVPBB = Plan->getEntry()->getEntryBasicBlock(); auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi(); - - VPValue *IV = nullptr; - if (Legal->getPrimaryInduction()) - IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction()); - else { - auto *IVRecipe = new VPWidenCanonicalIVRecipe(Plan->getCanonicalIV()); - HeaderVPBB->insert(IVRecipe, NewInsertionPoint); - IV = IVRecipe; - } + auto *IV = new VPWidenCanonicalIVRecipe(Plan->getCanonicalIV()); + HeaderVPBB->insert(IV, HeaderVPBB->getFirstNonPhi()); VPBuilder::InsertPointGuard Guard(Builder); Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint); @@ -9201,6 +9194,7 @@ } } + VPlanTransforms::removeRedundantCanonicalIVs(*Plan); VPlanTransforms::removeRedundantInductionCasts(*Plan); // Now that sink-after is done, move induction recipes for optimized truncates diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1076,6 +1076,12 @@ /// Returns true if the induction is canonical, i.e. starting at 0 and /// incremented by UF * VF (= the original IV is incremented by 1). bool isCanonical() const; + + /// Returns the scalar type of the induction. + const Type *getScalarType() const { + const TruncInst *TruncI = getTruncInst(); + return TruncI ? TruncI->getType() : IV->getType(); + } }; /// A pure virtual base class for all recipes modeling header phis, including @@ -1675,6 +1681,11 @@ void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif + + /// Returns the scalar type of the induction. + const Type *getScalarType() const { + return getOperand(0)->getLiveInIRValue()->getType(); + } }; /// A Recipe for widening the canonical induction variable of the vector loop. @@ -1691,6 +1702,16 @@ return D->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC; } + /// Extra classof implementations to allow directly casting from VPUser -> + /// VPWidenCanonicalIVRecipe. + static inline bool classof(const VPUser *U) { + auto *R = dyn_cast(U); + return R && R->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC; + } + static inline bool classof(const VPRecipeBase *R) { + return R->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC; + } + /// Generate a canonical vector induction variable of the vector loop, with /// start = { for 0 <= Part < UF}, and /// step = . @@ -1701,6 +1722,12 @@ void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif + + /// Returns the scalar type of the induction. + const Type *getScalarType() const { + return cast(getOperand(0)->getDef()) + ->getScalarType(); + } }; /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -45,6 +45,10 @@ /// in the vectorized loop. There is no need to vectorize the cast - the same /// value can be used for both the phi and casts in the vector loop. static void removeRedundantInductionCasts(VPlan &Plan); + + /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV + /// recipe, if it exists. + static void removeRedundantCanonicalIVs(VPlan &Plan); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -324,3 +324,30 @@ E.first->eraseFromParent(); } } + +void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) { + VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); + VPWidenCanonicalIVRecipe *WidenNewIV = nullptr; + for (VPUser *U : CanonicalIV->users()) { + WidenNewIV = dyn_cast(U); + if (WidenNewIV) + break; + } + + if (!WidenNewIV) + return; + + VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + for (VPRecipeBase &Phi : HeaderVPBB->phis()) { + auto *WidenOriginalIV = dyn_cast(&Phi); + + // If the induction recipe is canonical and the types match, use it + // directly. + if (WidenOriginalIV && WidenOriginalIV->isCanonical() && + WidenOriginalIV->getScalarType() == WidenNewIV->getScalarType()) { + WidenNewIV->replaceAllUsesWith(WidenOriginalIV); + WidenNewIV->eraseFromParent(); + return; + } + } +}