Index: lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -345,8 +345,18 @@ /// Build a VPlan according to the information gathered by Legal. \return a /// VPlan for vectorization factors \p Range.Start and up to \p Range.End /// exclusive, possibly decreasing \p Range.End. - VPlanPtr buildVPlan(VFRange &Range, - const SmallPtrSetImpl &NeedDef); + VPlanPtr buildVPlan(VFRange &Range); + + /// Build a VPlan using VPRecipes according to the information gather by + /// Legal. This method is only used for the legacy inner loop vectorizer. + VPlanPtr + buildVPlanWithVPRecipes(VFRange &Range, SmallPtrSetImpl &NeedDef, + SmallPtrSetImpl &DeadInstructions); + + /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, + /// according to the information gathered by Legal when it checked ijf it is + /// legal to vectorize the loop. This method creates VPlans using VPRecipes. + void buildVPlansWithVPRecipes(unsigned MinVF, unsigned MaxVF); }; } // namespace llvm Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6317,7 +6317,7 @@ // Collect the instructions (and their associated costs) that will be more // profitable to scalarize. CM.selectUserVectorizationFactor(UserVF); - buildVPlans(UserVF, UserVF); + buildVPlansWithVPRecipes(UserVF, UserVF); LLVM_DEBUG(printPlans(dbgs())); return {UserVF, 0}; } @@ -6335,11 +6335,12 @@ CM.collectInstsToScalarize(VF); } - buildVPlans(1, MaxVF); - LLVM_DEBUG(printPlans(dbgs())); if (MaxVF == 1) return NoVectorization; + buildVPlansWithVPRecipes(1, MaxVF); + LLVM_DEBUG(printPlans(dbgs())); + // Select the optimal vectorization factor. return CM.selectVectorizationFactor(MaxVF); } @@ -6496,23 +6497,9 @@ /// vectorization decision can potentially shorten this sub-range during /// buildVPlan(). void LoopVectorizationPlanner::buildVPlans(unsigned MinVF, unsigned MaxVF) { - - // Collect conditions feeding internal conditional branches; they need to be - // represented in VPlan for it to model masking. - SmallPtrSet NeedDef; - - auto *Latch = OrigLoop->getLoopLatch(); - for (BasicBlock *BB : OrigLoop->blocks()) { - if (BB == Latch) - continue; - BranchInst *Branch = dyn_cast(BB->getTerminator()); - if (Branch && Branch->isConditional()) - NeedDef.insert(Branch->getCondition()); - } - for (unsigned VF = MinVF; VF < MaxVF + 1;) { VFRange SubRange = {VF, MaxVF + 1}; - VPlans.push_back(buildVPlan(SubRange, NeedDef)); + VPlans.push_back(buildVPlan(SubRange)); VF = SubRange.End; } } @@ -6867,32 +6854,23 @@ return Region; } -LoopVectorizationPlanner::VPlanPtr -LoopVectorizationPlanner::buildVPlan(VFRange &Range, - const SmallPtrSetImpl &NeedDef) { - // Outer loop handling: They may require CFG and instruction level - // transformations before even evaluating whether vectorization is profitable. - // Since we cannot modify the incoming IR, we need to build VPlan upfront in - // the vectorization pipeline. - if (!OrigLoop->empty()) { - assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); - - // Create new empty VPlan - auto Plan = llvm::make_unique(); +void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF, + unsigned MaxVF) { + assert(OrigLoop->empty() && "Inner loop expected."); - // Build hierarchical CFG - VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI); - HCFGBuilder.buildHierarchicalCFG(*Plan.get()); + // Collect conditions feeding internal conditional branches; they need to be + // represented in VPlan for it to model masking. + SmallPtrSet NeedDef; - return Plan; + auto *Latch = OrigLoop->getLoopLatch(); + for (BasicBlock *BB : OrigLoop->blocks()) { + if (BB == Latch) + continue; + BranchInst *Branch = dyn_cast(BB->getTerminator()); + if (Branch && Branch->isConditional()) + NeedDef.insert(Branch->getCondition()); } - assert(OrigLoop->empty() && "Inner loop expected."); - EdgeMaskCache.clear(); - BlockMaskCache.clear(); - DenseMap &SinkAfter = Legal->getSinkAfter(); - DenseMap SinkAfterInverse; - // Collect instructions from the original loop that will become trivially dead // in the vectorized loop. We don't need to vectorize these instructions. For // example, original induction update instructions can become dead because we @@ -6902,11 +6880,28 @@ SmallPtrSet DeadInstructions; collectTriviallyDeadInstructions(DeadInstructions); + for (unsigned VF = MinVF; VF < MaxVF + 1;) { + VFRange SubRange = {VF, MaxVF + 1}; + VPlans.push_back( + buildVPlanWithVPRecipes(SubRange, NeedDef, DeadInstructions)); + VF = SubRange.End; + } +} + +LoopVectorizationPlanner::VPlanPtr +LoopVectorizationPlanner::buildVPlanWithVPRecipes( + VFRange &Range, SmallPtrSetImpl &NeedDef, + SmallPtrSetImpl &DeadInstructions) { // Hold a mapping from predicated instructions to their recipes, in order to // fix their AlsoPack behavior if a user is determined to replicate and use a // scalar instead of vector value. DenseMap PredInst2Recipe; + EdgeMaskCache.clear(); + BlockMaskCache.clear(); + DenseMap &SinkAfter = Legal->getSinkAfter(); + DenseMap SinkAfterInverse; + // Create a dummy pre-entry VPBasicBlock to start building the VPlan. VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); auto Plan = llvm::make_unique(VPBB); @@ -7048,6 +7043,25 @@ return Plan; } +LoopVectorizationPlanner::VPlanPtr +LoopVectorizationPlanner::buildVPlan(VFRange &Range) { + // Outer loop handling: They may require CFG and instruction level + // transformations before even evaluating whether vectorization is profitable. + // Since we cannot modify the incoming IR, we need to build VPlan upfront in + // the vectorization pipeline. + assert(!OrigLoop->empty()); + assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); + + // Create new empty VPlan + auto Plan = llvm::make_unique(); + + // Build hierarchical CFG + VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI); + HCFGBuilder.buildHierarchicalCFG(*Plan.get()); + + return Plan; +} + Value* LoopVectorizationPlanner::VPCallbackILV:: getOrCreateVectorValues(Value *V, unsigned Part) { return ILV.getOrCreateVectorValue(V, Part);