diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -293,7 +293,7 @@ /// Build a VPlan according to the information gathered by Legal. \return a /// VPlan for vectorization factors \p Range.Start and up to \p Range.End /// exclusive, possibly decreasing \p Range.End. - VPlanPtr buildVPlan(VFRange &Range); + VPlanPtr buildVPlan(const VFRange &Range); /// Build a VPlan using VPRecipes according to the information gather by /// Legal. This method is only used for the legacy inner loop vectorizer. @@ -301,10 +301,16 @@ VFRange &Range, SmallPtrSetImpl &DeadInstructions, const DenseMap &SinkAfter); + VPlanPtr + convertToVPRecipes(VPlan &OriginalPlan, VFRange &Range, + const DenseMap &SinkAfter); + /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is /// legal to vectorize the loop. This method creates VPlans using VPRecipes. void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF); + void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF, + VPlan &OriginalPlan); /// Adjust the recipes for any inloop reductions. The chain of instructions /// leading from the loop exit instr to the phi need to be converted to diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7740,7 +7740,8 @@ // profitable to scalarize. CM.selectUserVectorizationFactor(VF); CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(VF, VF); + auto InitialPlan = buildVPlan({VF, VF}); + buildVPlansWithVPRecipes(VF, VF, *InitialPlan); LLVM_DEBUG(printPlans(dbgs())); return {{VF, 0}}; } @@ -7761,7 +7762,8 @@ CM.collectInLoopReductions(); - buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxVF); + auto InitialPlan = buildVPlan({ElementCount::getFixed(1), MaxVF}); + buildVPlansWithVPRecipes(ElementCount::getFixed(1), MaxVF, *InitialPlan); LLVM_DEBUG(printPlans(dbgs())); if (MaxVF.isScalar()) return VectorizationFactor::Disabled(); @@ -8222,10 +8224,30 @@ /// buildVPlan(). void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF, ElementCount MaxVF) { + // Outer loop handling: They may require CFG and instruction level + // transformations before even evaluating whether vectorization is profitable. + // Since we cannot modify the incoming IR, we need to build VPlan upfront in + // the vectorization pipeline. + assert(!OrigLoop->isInnermost()); + assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); + auto MaxVFPlusOne = MaxVF.getWithIncrement(1); for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) { VFRange SubRange = {VF, MaxVFPlusOne}; - VPlans.push_back(buildVPlan(SubRange)); + auto Plan = buildVPlan(SubRange); + + if (EnableVPlanPredication) { + VPlanPredicator VPP(*Plan); + VPP.predicate(); + // Avoid running transformation to recipes until masked code generation in + // VPlan-native path is in place. + } else { + SmallPtrSet DeadInstructions; + VPlanTransforms::VPInstructionsToVPRecipes( + OrigLoop, Plan, Legal->getInductionVars(), DeadInstructions); + } + + VPlans.emplace_back(std::move(Plan)); VF = SubRange.End; } } @@ -8678,9 +8700,28 @@ } void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, - ElementCount MaxVF) { + ElementCount MaxVF, + VPlan &OriginalPlan) { assert(OrigLoop->isInnermost() && "Inner loop expected."); + // Remove the loop exit block and pre-header/entry block, to simplify the code + // dealing with VPlans for inner loops, as those blocks are not needed at the + // moment. + VPBasicBlock *Exit = + cast(OriginalPlan.getEntry()->getExitBasicBlock()); + SmallVector Preds(Exit->getPredecessors().begin(), + Exit->getPredecessors().end()); + for (VPBlockBase *Pred : Preds) + VPBlockUtils::disconnectBlocks(Pred, Exit); + delete Exit; + + VPBasicBlock *PreEntry = + cast(OriginalPlan.getEntry()->getEntryBasicBlock()); + assert(PreEntry->empty() && "Expecting empty pre-entry block."); + VPBlockBase *Entry = OriginalPlan.setEntry(PreEntry->getSingleSuccessor()); + VPBlockUtils::disconnectBlocks(PreEntry, Entry); + delete PreEntry; + // Collect instructions from the original loop that will become trivially dead // in the vectorized loop. We don't need to vectorize these instructions. For // example, original induction update instructions can become dead because we @@ -8702,17 +8743,38 @@ for (Instruction *I : DeadInstructions) SinkAfter.erase(I); + ReversePostOrderTraversal RPOT( + OriginalPlan.getEntry()->getEntryBasicBlock()); + + auto *DummyVal = new VPValue(); + OriginalPlan.addExternalDef(DummyVal); + for (VPBlockBase *Base : RPOT) { + VPBasicBlock *OriginalVPBB = Base->getEntryBasicBlock(); + // Introduce each ingredient into VPlan. + for (auto I = OriginalVPBB->rbegin(), E = OriginalVPBB->rend(); I != E;) { + VPRecipeBase *Ingredient = &*I++; + VPInstruction *VPInst = dyn_cast(Ingredient); + if (!VPInst) + continue; + Instruction *Instr = VPInst->getUnderlyingInstr(); + + if (DeadInstructions.contains(Instr) || isa(Instr)) { + Ingredient->getVPValue()->replaceAllUsesWith(DummyVal); + Ingredient->eraseFromParent(); + } + } + } + auto MaxVFPlusOne = MaxVF.getWithIncrement(1); for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) { VFRange SubRange = {VF, MaxVFPlusOne}; - VPlans.push_back( - buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter)); + VPlans.push_back(convertToVPRecipes(OriginalPlan, SubRange, SinkAfter)); VF = SubRange.End; } } -VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( - VFRange &Range, SmallPtrSetImpl &DeadInstructions, +VPlanPtr LoopVectorizationPlanner::convertToVPRecipes( + VPlan &OriginalPlan, VFRange &Range, const DenseMap &SinkAfter) { // Hold a mapping from predicated instructions to their recipes, in order to @@ -8775,31 +8837,43 @@ // Create a dummy pre-entry VPBasicBlock to start building the VPlan. auto Plan = std::make_unique(); - VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); - Plan->setEntry(VPBB); // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. LoopBlocksDFS DFS(OrigLoop); DFS.perform(LI); - for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { + ReversePostOrderTraversal RPOT( + OriginalPlan.getEntry()->getEntryBasicBlock()); + + VPBasicBlock *VPBB = nullptr; + for (VPBlockBase *Base : RPOT) { + VPBasicBlock *OriginalVPBB = Base->getEntryBasicBlock(); + // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. unsigned VPBBsForBB = 0; - auto *FirstVPBBForBB = new VPBasicBlock(BB->getName()); - VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB); + auto *FirstVPBBForBB = new VPBasicBlock(OriginalVPBB->getName()); + if (VPBB) + VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB); + else + Plan->setEntry(FirstVPBBForBB); VPBB = FirstVPBBForBB; Builder.setInsertPoint(VPBB); // Introduce each ingredient into VPlan. - // TODO: Model and preserve debug instrinsics in VPlan. - for (Instruction &I : BB->instructionsWithoutDebug()) { - Instruction *Instr = &I; + for (auto I = OriginalVPBB->begin(), E = OriginalVPBB->end(); I != E;) { + VPRecipeBase *Ingredient = &*I++; + VPInstruction *VPInst = dyn_cast(Ingredient); + Instruction *Instr = + VPInst + ? VPInst->getUnderlyingInstr() + : cast( + cast(Ingredient)->getUnderlyingInstr()); // First filter out irrelevant instructions, to ensure no recipes are // built for them. - if (isa(Instr) || DeadInstructions.count(Instr)) + if (isa(Instr)) continue; if (auto RecipeOrValue = @@ -8827,21 +8901,11 @@ Instr, Range, VPBB, PredInst2Recipe, Plan); if (NextVPBB != VPBB) { VPBB = NextVPBB; - VPBB->setName(BB->hasName() ? BB->getName() + "." + Twine(VPBBsForBB++) - : ""); + VPBB->setName(OriginalVPBB->getName() + "." + Twine(VPBBsForBB++)); } } } - // Discard empty dummy pre-entry VPBasicBlock. Note that other VPBasicBlocks - // may also be empty, such as the last one VPBB, reflecting original - // basic-blocks with no recipes. - VPBasicBlock *PreEntry = cast(Plan->getEntry()); - assert(PreEntry->empty() && "Expecting empty pre-entry block."); - VPBlockBase *Entry = Plan->setEntry(PreEntry->getSingleSuccessor()); - VPBlockUtils::disconnectBlocks(PreEntry, Entry); - delete PreEntry; - // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to // bring the VPlan to its final state. @@ -8928,14 +8992,7 @@ return Plan; } -VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { - // Outer loop handling: They may require CFG and instruction level - // transformations before even evaluating whether vectorization is profitable. - // Since we cannot modify the incoming IR, we need to build VPlan upfront in - // the vectorization pipeline. - assert(!OrigLoop->isInnermost()); - assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); - +VPlanPtr LoopVectorizationPlanner::buildVPlan(const VFRange &Range) { // Create new empty VPlan auto Plan = std::make_unique(); @@ -8947,18 +9004,6 @@ VF *= 2) Plan->addVF(VF); - if (EnableVPlanPredication) { - VPlanPredicator VPP(*Plan); - VPP.predicate(); - - // Avoid running transformation to recipes until masked code generation in - // VPlan-native path is in place. - return Plan; - } - - SmallPtrSet DeadInstructions; - VPlanTransforms::VPInstructionsToVPRecipes( - OrigLoop, Plan, Legal->getInductionVars(), DeadInstructions); return Plan; }