diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -289,7 +289,7 @@ /// Build a VPlan according to the information gathered by Legal. \return a /// VPlan for vectorization factors \p Range.Start and up to \p Range.End /// exclusive, possibly decreasing \p Range.End. - VPlanPtr buildVPlan(VFRange &Range); + VPlanPtr buildVPlan(const VFRange &Range); /// Build a VPlan using VPRecipes according to the information gather by /// Legal. This method is only used for the legacy inner loop vectorizer. @@ -298,10 +298,17 @@ SmallPtrSetImpl &DeadInstructions, const DenseMap &SinkAfter); + VPlanPtr + convertToVPRecipes(VPlan &OriginalPlan, VFRange &Range, + SmallPtrSetImpl &NeedDef, + const DenseMap &SinkAfter); + /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is /// legal to vectorize the loop. This method creates VPlans using VPRecipes. void buildVPlansWithVPRecipes(unsigned MinVF, unsigned MaxVF); + void buildVPlansWithVPRecipes(unsigned MinVF, unsigned MaxVF, + VPlan &OriginalPlan); /// Adjust the recipes for any inloop reductions. The chain of instructions /// leading from the loop exit instr to the phi need to be converted to diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5267,8 +5267,6 @@ if (!useMaskedInterleavedAccesses(TTI)) { assert(WideningDecisions.empty() && Uniforms.empty() && Scalars.empty() && "No decisions should have been taken at this point"); - // Note: There is no need to invalidate any cost modeling decisions here, as - // non where taken so far. InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); } @@ -7008,8 +7006,10 @@ // profitable to scalarize. CM.selectUserVectorizationFactor(UserVF); CM.collectInLoopReductions(); + auto InitialPlan = + buildVPlan({UserVF.getKnownMinValue(), UserVF.getKnownMinValue()}); buildVPlansWithVPRecipes(UserVF.getKnownMinValue(), - UserVF.getKnownMinValue()); + UserVF.getKnownMinValue(), *InitialPlan); LLVM_DEBUG(printPlans(dbgs())); return {{UserVF, 0}}; } @@ -7029,7 +7029,8 @@ CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(1, MaxVF); + auto InitialPlan = buildVPlan({1, MaxVF}); + buildVPlansWithVPRecipes(1, MaxVF, *InitialPlan); LLVM_DEBUG(printPlans(dbgs())); if (MaxVF == 1) return VectorizationFactor::Disabled(); @@ -7201,9 +7202,29 @@ /// vectorization decision can potentially shorten this sub-range during /// buildVPlan(). void LoopVectorizationPlanner::buildVPlans(unsigned MinVF, unsigned MaxVF) { + // Outer loop handling: They may require CFG and instruction level + // transformations before even evaluating whether vectorization is profitable. + // Since we cannot modify the incoming IR, we need to build VPlan upfront in + // the vectorization pipeline. + assert(!OrigLoop->isInnermost()); + assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); + for (unsigned VF = MinVF; VF < MaxVF + 1;) { VFRange SubRange = {VF, MaxVF + 1}; - VPlans.push_back(buildVPlan(SubRange)); + auto Plan = buildVPlan(SubRange); + + if (EnableVPlanPredication) { + VPlanPredicator VPP(*Plan); + VPP.predicate(); + // Avoid running transformation to recipes until masked code generation in + // VPlan-native path is in place. + } else { + SmallPtrSet DeadInstructions; + VPlanTransforms::VPInstructionsToVPRecipes( + OrigLoop, Plan, Legal->getInductionVars(), DeadInstructions); + } + + VPlans.emplace_back(std::move(Plan)); VF = SubRange.End; } } @@ -7614,7 +7635,8 @@ } void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF, - unsigned MaxVF) { + unsigned MaxVF, + VPlan &OriginalPlan) { assert(OrigLoop->isInnermost() && "Inner loop expected."); // Collect conditions feeding internal conditional branches; they need to be @@ -7667,17 +7689,41 @@ for (Instruction *I : DeadInstructions) SinkAfter.erase(I); + auto *TopRegion = cast(OriginalPlan.getEntry()); + ReversePostOrderTraversal RPOT(TopRegion->getEntry()); + + auto *DummyVal = new VPValue(); + OriginalPlan.addExternalDef(DummyVal); + for (VPBlockBase *Base : RPOT) { + // Do not widen instructions in pre-header and exit blocks. + if (Base->getNumPredecessors() == 0 || Base->getNumSuccessors() == 0) + continue; + + VPBasicBlock *OriginalVPBB = Base->getEntryBasicBlock(); + OriginalVPBB->setCondBit(nullptr); + // Introduce each ingredient into VPlan. + for (auto I = OriginalVPBB->rbegin(), E = OriginalVPBB->rend(); I != E;) { + VPRecipeBase *Ingredient = &*I++; + VPInstruction *VPInst = cast(Ingredient); + Instruction *Instr = VPInst->getUnderlyingInstr(); + + if (DeadInstructions.contains(Instr) || isa(Instr)) { + Ingredient->getVPValue()->replaceAllUsesWith(DummyVal); + Ingredient->eraseFromParent(); + } + } + } + for (unsigned VF = MinVF; VF < MaxVF + 1;) { VFRange SubRange = {VF, MaxVF + 1}; - VPlans.push_back(buildVPlanWithVPRecipes(SubRange, NeedDef, - DeadInstructions, SinkAfter)); + VPlans.push_back( + convertToVPRecipes(OriginalPlan, SubRange, NeedDef, SinkAfter)); VF = SubRange.End; } } -VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( - VFRange &Range, SmallPtrSetImpl &NeedDef, - SmallPtrSetImpl &DeadInstructions, +VPlanPtr LoopVectorizationPlanner::convertToVPRecipes( + VPlan &OriginalPlan, VFRange &Range, SmallPtrSetImpl &NeedDef, const DenseMap &SinkAfter) { // Hold a mapping from predicated instructions to their recipes, in order to @@ -7775,24 +7821,33 @@ LoopBlocksDFS DFS(OrigLoop); DFS.perform(LI); - for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { + auto *TopRegion = cast(OriginalPlan.getEntry()); + ReversePostOrderTraversal RPOT(TopRegion->getEntry()); + + for (VPBlockBase *Base : RPOT) { + // Do not widen instructions in pre-header and exit blocks. + if (Base->getNumPredecessors() == 0 || Base->getNumSuccessors() == 0) + continue; + + VPBasicBlock *OriginalVPBB = Base->getEntryBasicBlock(); + // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. unsigned VPBBsForBB = 0; - auto *FirstVPBBForBB = new VPBasicBlock(BB->getName()); + auto *FirstVPBBForBB = new VPBasicBlock(OriginalVPBB->getName()); VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB); VPBB = FirstVPBBForBB; Builder.setInsertPoint(VPBB); // Introduce each ingredient into VPlan. - // TODO: Model and preserve debug instrinsics in VPlan. - for (Instruction &I : BB->instructionsWithoutDebug()) { - Instruction *Instr = &I; + for (auto I = OriginalVPBB->begin(), E = OriginalVPBB->end(); I != E;) { + VPRecipeBase *Ingredient = &*I++; + VPInstruction *VPInst = cast(Ingredient); + Instruction *Instr = VPInst->getUnderlyingInstr(); // First filter out irrelevant instructions, to ensure no recipes are // built for them. - if (isa(Instr) || DeadInstructions.count(Instr) || - DeadInterleaveGroupMembers.contains(Instr)) + if (isa(Instr) || DeadInterleaveGroupMembers.contains(Instr)) continue; if (auto Recipe = @@ -7812,7 +7867,7 @@ } else if (Recipe->getNumDefinedValues() != 0) { assert(Recipe->getNumDefinedValues() == 1 && "all multi-defs should be handled earlier"); - RegisterVPValue(&I, Recipe->getVPValue()); + RegisterVPValue(Instr, Recipe->getVPValue()); } RecipeBuilder.setRecipe(Instr, Recipe); @@ -7826,8 +7881,7 @@ Instr, Range, VPBB, PredInst2Recipe, Plan); if (NextVPBB != VPBB) { VPBB = NextVPBB; - VPBB->setName(BB->hasName() ? BB->getName() + "." + Twine(VPBBsForBB++) - : ""); + VPBB->setName(OriginalVPBB->getName() + "." + Twine(VPBBsForBB++)); } } } @@ -7889,14 +7943,7 @@ return Plan; } -VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { - // Outer loop handling: They may require CFG and instruction level - // transformations before even evaluating whether vectorization is profitable. - // Since we cannot modify the incoming IR, we need to build VPlan upfront in - // the vectorization pipeline. - assert(!OrigLoop->isInnermost()); - assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); - +VPlanPtr LoopVectorizationPlanner::buildVPlan(const VFRange &Range) { // Create new empty VPlan auto Plan = std::make_unique(); @@ -7907,18 +7954,6 @@ for (unsigned VF = Range.Start; VF < Range.End; VF *= 2) Plan->addVF(ElementCount::getFixed(VF)); - if (EnableVPlanPredication) { - VPlanPredicator VPP(*Plan); - VPP.predicate(); - - // Avoid running transformation to recipes until masked code generation in - // VPlan-native path is in place. - return Plan; - } - - SmallPtrSet DeadInstructions; - VPlanTransforms::VPInstructionsToVPRecipes( - OrigLoop, Plan, Legal->getInductionVars(), DeadInstructions); return Plan; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1345,6 +1345,8 @@ inline const VPRecipeBase &back() const { return Recipes.back(); } inline VPRecipeBase &back() { return Recipes.back(); } + iterator_range recipes() { return make_range(begin(), end()); } + /// Returns a reference to the list of recipes. RecipeListTy &getRecipeList() { return Recipes; }