diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -31,6 +31,8 @@ namespace llvm { +class PredicatedScalarEvolution; + /// VPlan-based builder utility analogous to IRBuilder. class VPBuilder { VPBasicBlock *BB = nullptr; @@ -203,6 +205,8 @@ /// The interleaved access analysis. InterleavedAccessInfo &IAI; + PredicatedScalarEvolution &PSE; + SmallVector VPlans; /// This class is used to enable the VPlan to invoke a method of ILV. This is @@ -228,9 +232,10 @@ const TargetTransformInfo *TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, - InterleavedAccessInfo &IAI) - : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), - IAI(IAI) {} + InterleavedAccessInfo &IAI, + PredicatedScalarEvolution &PSE) + : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI), + PSE(PSE) {} /// Plan how to best vectorize, return the best VF and its cost, or None if /// vectorization and interleaving should be avoided up front. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -413,6 +413,9 @@ void widenCallInstruction(CallInst &I, VPUser &ArgOperands, VPTransformState &State); + /// Widen a single select instruction within the innermost loop. + void widenSelectInstruction(SelectInst &I, bool InvariantCond); + /// Fix the vectorized code, taking care of header phi's, live-outs, and more. void fixVectorizedLoop(); @@ -4232,6 +4235,7 @@ case Instruction::Br: case Instruction::PHI: case Instruction::GetElementPtr: + case Instruction::Select: llvm_unreachable("This instruction is handled by a different recipe."); case Instruction::UDiv: case Instruction::SDiv: @@ -4272,35 +4276,6 @@ break; } - case Instruction::Select: { - // Widen selects. - // If the selector is loop invariant we can create a select - // instruction with a scalar condition. Otherwise, use vector-select. - auto *SE = PSE.getSE(); - bool InvariantCond = - SE->isLoopInvariant(PSE.getSCEV(I.getOperand(0)), OrigLoop); - setDebugLocFromInst(Builder, &I); - - // The condition can be loop invariant but still defined inside the - // loop. This means that we can't just use the original 'cond' value. - // We have to take the 'vectorized' value and pick the first lane. - // Instcombine will make this a no-op. - - auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), {0, 0}); - - for (unsigned Part = 0; Part < UF; ++Part) { - Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part); - Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part); - Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part); - Value *Sel = - Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1); - VectorLoopValueMap.setVectorValue(&I, Part, Sel); - addMetadata(Sel, &I); - } - - break; - } - case Instruction::ICmp: case Instruction::FCmp: { // Widen compares. Generate vector compares. @@ -4433,6 +4408,28 @@ } } +void InnerLoopVectorizer::widenSelectInstruction(SelectInst &I, + bool InvariantCond) { + setDebugLocFromInst(Builder, &I); + + // The condition can be loop invariant but still defined inside the + // loop. This means that we can't just use the original 'cond' value. + // We have to take the 'vectorized' value and pick the first lane. + // Instcombine will make this a no-op. + + auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), {0, 0}); + + for (unsigned Part = 0; Part < UF; ++Part) { + Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part); + Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part); + Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part); + Value *Sel = + Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1); + VectorLoopValueMap.setVectorValue(&I, Part, Sel); + addMetadata(Sel, &I); + } +} + void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // We should not collect Scalars more than once per VF. Right now, this // function is called from collectUniformsAndScalars(), which already does @@ -6937,6 +6934,33 @@ return new VPWidenCallRecipe(*CI, VPValues); } +VPWidenSelectRecipe *VPRecipeBuilder::tryToWidenSelect(Instruction *I, + VFRange &Range) { + auto *SI = dyn_cast(I); + if (!SI) + return nullptr; + + bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( + [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range); + + if (IsPredicated) + return nullptr; + + auto willWiden = [&](unsigned VF) -> bool { + return !CM.isScalarAfterVectorization(I, VF) && + !CM.isProfitableToScalarize(I, VF); + }; + + if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) + return nullptr; + + auto *SE = PSE.getSE(); + bool InvariantCond = + SE->isLoopInvariant(PSE.getSCEV(I->getOperand(0)), OrigLoop); + // Success: widen this instruction. + return new VPWidenSelectRecipe(*SI, InvariantCond); +} + VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, VFRange &Range) { bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range); @@ -7088,6 +7112,7 @@ // operations, inductions and Phi nodes. if ((Recipe = tryToWidenCall(Instr, Range, *Plan)) || (Recipe = tryToWidenMemory(Instr, Range, Plan)) || + (Recipe = tryToWidenSelect(Instr, Range)) || (Recipe = tryToOptimizeInduction(Instr, Range)) || (Recipe = tryToBlend(Instr, Plan)) || (isa(Instr) && @@ -7194,7 +7219,7 @@ SmallPtrSet *, 1> InterleaveGroups; - VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder); + VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, PSE, Builder); // --------------------------------------------------------------------------- // Pre-construction: record ingredients whose recipes we'll need to further @@ -7410,6 +7435,10 @@ State.ILV->widenCallInstruction(Ingredient, User, State); } +void VPWidenSelectRecipe::execute(VPTransformState &State) { + State.ILV->widenSelectInstruction(Ingredient, InvariantCond); +} + void VPWidenRecipe::execute(VPTransformState &State) { State.ILV->widenInstruction(Ingredient); } @@ -7620,7 +7649,7 @@ // Use the planner for outer loop vectorization. // TODO: CM is not used at this point inside the planner. Turn CM into an // optional argument if we don't need it in the future. - LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI); + LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE); // Get user vectorization factor. const unsigned UserVF = Hints.getWidth(); @@ -7779,7 +7808,7 @@ CM.collectValuesToIgnore(); // Use the planner for vectorization. - LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI); + LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE); // Get user vectorization factor. unsigned UserVF = Hints.getWidth(); diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -35,6 +35,8 @@ /// The profitablity analysis. LoopVectorizationCostModel &CM; + PredicatedScalarEvolution &PSE; + VPBuilder &Builder; /// When we if-convert we need to create edge masks. We have to cache values @@ -113,6 +115,8 @@ VPWidenCallRecipe *tryToWidenCall(Instruction *I, VFRange &Range, VPlan &Plan); + VPWidenSelectRecipe *tryToWidenSelect(Instruction *I, VFRange &Range); + /// Check if \p I can be widened within the given VF \p Range. If \p I can be /// widened for \p Range.Start, build a new VPWidenRecipe and return it. /// Range.End may be decreased to ensure same decision from \p Range.Start to @@ -127,8 +131,10 @@ public: VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI, LoopVectorizationLegality *Legal, - LoopVectorizationCostModel &CM, VPBuilder &Builder) - : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), Builder(Builder) {} + LoopVectorizationCostModel &CM, + PredicatedScalarEvolution &PSE, VPBuilder &Builder) + : OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), PSE(PSE), + Builder(Builder) {} /// Check if a recipe can be create for \p I withing the given VF \p Range. /// If a recipe can be created, it adds it to \p VPBB. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -617,6 +617,7 @@ VPWidenMemoryInstructionSC, VPWidenPHISC, VPWidenSC, + VPWidenSelectSC }; VPRecipeBase(const unsigned char SC) : SubclassID(SC) {} @@ -813,6 +814,38 @@ VPSlotTracker &SlotTracker) const override; }; +/// A recipe for widening select instructions. +class VPWidenSelectRecipe : public VPRecipeBase { +private: + /// Hold the select to be widened. + SelectInst &Ingredient; + + /// Is the condition of the select loop invariant? + bool InvariantCond; + + /// Hold VPValues for the arguments of the call. + VPUser User; + +public: + VPWidenSelectRecipe(SelectInst &I, bool InvariantCond) + : VPRecipeBase(VPWidenSelectSC), Ingredient(I), + InvariantCond(InvariantCond) {} + + ~VPWidenSelectRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPRecipeBase *V) { + return V->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC; + } + + /// Produce a widened version of the select instruction. + void execute(VPTransformState &State) override; + + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +}; + /// A recipe for handling GEP instructions. class VPWidenGEPRecipe : public VPRecipeBase { GetElementPtrInst *GEP; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -718,6 +718,14 @@ << Indent << "\"WIDEN-CALL " << VPlanIngredient(&Ingredient) << "\\l\""; } +void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << " +\n" + << Indent << "\"WIDEN-SELECT" << VPlanIngredient(&Ingredient) + << " (condition is " << (InvariantCond ? "" : " not ") + << "loop invariant)\\l\""; +} + void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << " +\n" << Indent << "\"WIDEN\\l\"";