Index: lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -345,8 +345,11 @@ /// Build a VPlan according to the information gathered by Legal. \return a /// VPlan for vectorization factors \p Range.Start and up to \p Range.End /// exclusive, possibly decreasing \p Range.End. - VPlanPtr buildVPlan(VFRange &Range, - const SmallPtrSetImpl &NeedDef); + VPlanPtr buildVPlan(VFRange &Range); + + /// Build a VPlan using VPRecipes according to the information gather by + /// Legal. This method is only used for the legacy inner loop vectorizer. + VPlanPtr buildVPRecipes(VFRange &Range); }; } // namespace llvm Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6311,37 +6311,40 @@ if (!MaybeMaxVF.hasValue()) // Cases considered too costly to vectorize. return NoVectorization; + VectorizationFactor BestVF; if (UserVF) { LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); // Collect the instructions (and their associated costs) that will be more // profitable to scalarize. CM.selectUserVectorizationFactor(UserVF); - buildVPlans(UserVF, UserVF); - LLVM_DEBUG(printPlans(dbgs())); - return {UserVF, 0}; - } + BestVF = {UserVF, 0}; + } else { + unsigned MaxVF = MaybeMaxVF.getValue(); + assert(MaxVF != 0 && "MaxVF is zero."); - unsigned MaxVF = MaybeMaxVF.getValue(); - assert(MaxVF != 0 && "MaxVF is zero."); + for (unsigned VF = 1; VF <= MaxVF; VF *= 2) { + // Collect Uniform and Scalar instructions after vectorization with VF. + CM.collectUniformsAndScalars(VF); - for (unsigned VF = 1; VF <= MaxVF; VF *= 2) { - // Collect Uniform and Scalar instructions after vectorization with VF. - CM.collectUniformsAndScalars(VF); + // Collect the instructions (and their associated costs) that will be more + // profitable to scalarize. + if (VF > 1) + CM.collectInstsToScalarize(VF); + } - // Collect the instructions (and their associated costs) that will be more - // profitable to scalarize. - if (VF > 1) - CM.collectInstsToScalarize(VF); + if (MaxVF == 1) + return NoVectorization; + + BestVF = CM.selectVectorizationFactor(MaxVF); } - buildVPlans(1, MaxVF); + VFRange R = {BestVF.Width, BestVF.Width + 1}; + VPlans.push_back(buildVPRecipes(R)); LLVM_DEBUG(printPlans(dbgs())); - if (MaxVF == 1) - return NoVectorization; // Select the optimal vectorization factor. - return CM.selectVectorizationFactor(MaxVF); + return BestVF; } void LoopVectorizationPlanner::setBestPlan(unsigned VF, unsigned UF) { @@ -6496,23 +6499,9 @@ /// vectorization decision can potentially shorten this sub-range during /// buildVPlan(). void LoopVectorizationPlanner::buildVPlans(unsigned MinVF, unsigned MaxVF) { - - // Collect conditions feeding internal conditional branches; they need to be - // represented in VPlan for it to model masking. - SmallPtrSet NeedDef; - - auto *Latch = OrigLoop->getLoopLatch(); - for (BasicBlock *BB : OrigLoop->blocks()) { - if (BB == Latch) - continue; - BranchInst *Branch = dyn_cast(BB->getTerminator()); - if (Branch && Branch->isConditional()) - NeedDef.insert(Branch->getCondition()); - } - for (unsigned VF = MinVF; VF < MaxVF + 1;) { VFRange SubRange = {VF, MaxVF + 1}; - VPlans.push_back(buildVPlan(SubRange, NeedDef)); + VPlans.push_back(buildVPlan(SubRange)); VF = SubRange.End; } } @@ -6868,26 +6857,22 @@ } LoopVectorizationPlanner::VPlanPtr -LoopVectorizationPlanner::buildVPlan(VFRange &Range, - const SmallPtrSetImpl &NeedDef) { - // Outer loop handling: They may require CFG and instruction level - // transformations before even evaluating whether vectorization is profitable. - // Since we cannot modify the incoming IR, we need to build VPlan upfront in - // the vectorization pipeline. - if (!OrigLoop->empty()) { - assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); - - // Create new empty VPlan - auto Plan = llvm::make_unique(); +LoopVectorizationPlanner::buildVPRecipes(VFRange &Range) { + assert(OrigLoop->empty() && "Inner loop expected."); - // Build hierarchical CFG - VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI); - HCFGBuilder.buildHierarchicalCFG(*Plan.get()); + // Collect conditions feeding internal conditional branches; they need to be + // represented in VPlan for it to model masking. + SmallPtrSet NeedDef; - return Plan; + auto *Latch = OrigLoop->getLoopLatch(); + for (BasicBlock *BB : OrigLoop->blocks()) { + if (BB == Latch) + continue; + BranchInst *Branch = dyn_cast(BB->getTerminator()); + if (Branch && Branch->isConditional()) + NeedDef.insert(Branch->getCondition()); } - assert(OrigLoop->empty() && "Inner loop expected."); EdgeMaskCache.clear(); BlockMaskCache.clear(); DenseMap &SinkAfter = Legal->getSinkAfter(); @@ -7048,6 +7033,25 @@ return Plan; } +LoopVectorizationPlanner::VPlanPtr +LoopVectorizationPlanner::buildVPlan(VFRange &Range) { + // Outer loop handling: They may require CFG and instruction level + // transformations before even evaluating whether vectorization is profitable. + // Since we cannot modify the incoming IR, we need to build VPlan upfront in + // the vectorization pipeline. + assert(!OrigLoop->empty()); + assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); + + // Create new empty VPlan + auto Plan = llvm::make_unique(); + + // Build hierarchical CFG + VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI); + HCFGBuilder.buildHierarchicalCFG(*Plan.get()); + + return Plan; +} + Value* LoopVectorizationPlanner::VPCallbackILV:: getOrCreateVectorValues(Value *V, unsigned Part) { return ILV.getOrCreateVectorValue(V, Part);