diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6815,8 +6815,8 @@ VPWidenMemoryInstructionRecipe * VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan) { - if (!isa(I) && !isa(I)) - return nullptr; + assert((isa(I) || isa(I)) && + "Must be called with either a load or store"); auto willWiden = [&](unsigned VF) -> bool { if (VF == 1) @@ -6850,18 +6850,19 @@ } VPWidenIntOrFpInductionRecipe * -VPRecipeBuilder::tryToOptimizeInduction(Instruction *I, VFRange &Range) { - if (PHINode *Phi = dyn_cast(I)) { - // Check if this is an integer or fp induction. If so, build the recipe that - // produces its scalar and vector values. - InductionDescriptor II = Legal->getInductionVars().lookup(Phi); - if (II.getKind() == InductionDescriptor::IK_IntInduction || - II.getKind() == InductionDescriptor::IK_FpInduction) - return new VPWidenIntOrFpInductionRecipe(Phi); +VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi) { + // Check if this is an integer or fp induction. If so, build the recipe that + // produces its scalar and vector values. + InductionDescriptor II = Legal->getInductionVars().lookup(Phi); + if (II.getKind() == InductionDescriptor::IK_IntInduction || + II.getKind() == InductionDescriptor::IK_FpInduction) + return new VPWidenIntOrFpInductionRecipe(Phi); - return nullptr; - } + return nullptr; +} +VPWidenIntOrFpInductionRecipe * +VPRecipeBuilder::tryToOptimizeInductionTruncate(TruncInst *I, VFRange &Range) { // Optimize the special case where the source is a constant integer // induction variable. Notice that we can only optimize the 'trunc' case // because (a) FP conversions lose precision, (b) sext/zext may wrap, and @@ -6875,18 +6876,14 @@ [=](unsigned VF) -> bool { return CM.isOptimizableIVTruncate(K, VF); }; }; - if (isa(I) && LoopVectorizationPlanner::getDecisionAndClampRange( - isOptimizableIVTruncate(I), Range)) + if (LoopVectorizationPlanner::getDecisionAndClampRange( + isOptimizableIVTruncate(I), Range)) return new VPWidenIntOrFpInductionRecipe(cast(I->getOperand(0)), - cast(I)); + I); return nullptr; } -VPBlendRecipe *VPRecipeBuilder::tryToBlend(Instruction *I, VPlanPtr &Plan) { - PHINode *Phi = dyn_cast(I); - if (!Phi || Phi->getParent() == OrigLoop->getHeader()) - return nullptr; - +VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, VPlanPtr &Plan) { // We know that all PHIs in non-header blocks are converted into selects, so // we don't have to worry about the insertion order and we can just use the // builder. At this point we generate the predication tree. There may be @@ -6907,14 +6904,14 @@ return new VPBlendRecipe(Phi, Operands); } -VPWidenCallRecipe * -VPRecipeBuilder::tryToWidenCall(Instruction *I, VFRange &Range, VPlan &Plan) { +VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, VFRange &Range, + VPlan &Plan) { bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( - [&](unsigned VF) { return CM.isScalarWithPredication(I, VF); }, Range); + [this, CI](unsigned VF) { return CM.isScalarWithPredication(CI, VF); }, + Range); - CallInst *CI = dyn_cast(I); - if (IsPredicated || !CI) + if (IsPredicated) return nullptr; Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); @@ -6960,17 +6957,6 @@ Range); } -VPWidenSelectRecipe *VPRecipeBuilder::tryToWidenSelect(Instruction *I) { - auto *SI = dyn_cast(I); - if (!SI) - return nullptr; - auto *SE = PSE.getSE(); - bool InvariantCond = - SE->isLoopInvariant(PSE.getSCEV(SI->getOperand(0)), OrigLoop); - // Success: widen this instruction. - return new VPWidenSelectRecipe(*SI, InvariantCond); -} - VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, VPlan &Plan) { auto IsVectorizableOpcode = [](unsigned Opcode) { switch (Opcode) { @@ -7088,42 +7074,44 @@ return Region; } -bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range, - VPlanPtr &Plan, VPBasicBlock *VPBB) { - VPRecipeBase *Recipe = nullptr; - +VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, + VFRange &Range, + VPlanPtr &Plan) { // First, check for specific widening recipes that deal with calls, memory // operations, inductions and Phi nodes. - if ((Recipe = tryToWidenCall(Instr, Range, *Plan)) || - (Recipe = tryToWidenMemory(Instr, Range, Plan)) || - (Recipe = tryToOptimizeInduction(Instr, Range)) || - (Recipe = tryToBlend(Instr, Plan)) || - (isa(Instr) && - (Recipe = new VPWidenPHIRecipe(cast(Instr))))) { - setRecipe(Instr, Recipe); - VPBB->appendRecipe(Recipe); - return true; + if (auto *CI = dyn_cast(Instr)) + return tryToWidenCall(CI, Range, *Plan); + + if (isa(Instr) || isa(Instr)) + return tryToWidenMemory(Instr, Range, Plan); + + VPRecipeBase *Recipe; + if (auto Phi = dyn_cast(Instr)) { + if (Phi->getParent() != OrigLoop->getHeader()) + return tryToBlend(Phi, Plan); + if ((Recipe = tryToOptimizeInductionPHI(Phi))) + return Recipe; + return new VPWidenPHIRecipe(Phi); + return new VPWidenPHIRecipe(Phi); } - // Calls and memory instructions are widened by the specialized recipes above, - // or scalarized. - if (isa(Instr) || isa(Instr) || isa(Instr)) - return false; + if (isa(Instr) && + (Recipe = tryToOptimizeInductionTruncate(cast(Instr), Range))) + return Recipe; if (!shouldWiden(Instr, Range)) - return false; + return nullptr; - if ((Recipe = tryToWidenSelect(Instr)) || - (isa(Instr) && - (Recipe = - new VPWidenGEPRecipe(cast(Instr), OrigLoop))) || - (Recipe = tryToWiden(Instr, *Plan))) { - setRecipe(Instr, Recipe); - VPBB->appendRecipe(Recipe); - return true; + if (auto GEP = dyn_cast(Instr)) + return new VPWidenGEPRecipe(GEP, OrigLoop); + + if (auto *SI = dyn_cast(Instr)) { + bool InvariantCond = + PSE.getSE()->isLoopInvariant(PSE.getSCEV(SI->getOperand(0)), OrigLoop); + return new VPWidenSelectRecipe(*SI, InvariantCond); } - return false; + return tryToWiden(Instr, *Plan); } void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF, @@ -7268,8 +7256,12 @@ DeadInstructions.find(Instr) != DeadInstructions.end()) continue; - if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB)) + if (auto Recipe = + RecipeBuilder.tryToCreateWidenRecipe(Instr, Range, Plan)) { + RecipeBuilder.setRecipe(Instr, Recipe); + VPBB->appendRecipe(Recipe); continue; + } // Otherwise, if all widening options failed, Instruction is to be // replicated. This may create a successor for VPBB. diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -54,6 +54,12 @@ // marked by having a nullptr entry in this map. DenseMap Ingredient2Recipe; + /// Check if \p I can be widened at the start of \p Range and possibly + /// decrease the range such that the returned value holds for the entire \p + /// Range. The function should not be called for memory instructions or calls. + bool shouldWiden(Instruction *I, VFRange &Range) const; + +public: /// Set the recipe created for given ingredient. This operation is a no-op for /// ingredients that were not marked using a nullptr entry in the map. void setRecipe(Instruction *I, VPRecipeBase *R) { @@ -64,12 +70,6 @@ Ingredient2Recipe[I] = R; } - /// Check if \p I can be widened at the start of \p Range and possibly - /// decrease the range such that the returned value holds for the entire \p - /// Range. The function should not be called for memory instructions or calls. - bool shouldWiden(Instruction *I, VFRange &Range) const; - -public: /// A helper function that computes the predicate of the block BB, assuming /// that the header block of the loop is set to True. It returns the *entry* /// mask for the block BB. @@ -96,33 +96,30 @@ return Ingredient2Recipe[I]; } - /// Check if \I is a memory instruction to be widened for \p Range.Start and - /// potentially masked. Such instructions are handled by a recipe that takes - /// an additional VPInstruction for the mask. + /// Check if the load or store instruction \p I should widened for \p + /// Range.Start and potentially masked. Such instructions are handled by a + /// recipe that takes an additional VPInstruction for the mask. VPWidenMemoryInstructionRecipe * tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan); - /// Check if an induction recipe should be constructed for \I within the given - /// VF \p Range. If so build and return it. If not, return null. \p Range.End - /// may be decreased to ensure same decision from \p Range.Start to - /// \p Range.End. - VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I, - VFRange &Range); + /// Check if an induction recipe should be constructed for \I. If so build and + /// return it. If not, return null. + VPWidenIntOrFpInductionRecipe *tryToOptimizeInductionPHI(PHINode *Phi); + + /// Optimize the special case where the operand of \p I is a constant integer + /// induction variable. + VPWidenIntOrFpInductionRecipe *tryToOptimizeInductionTruncate(TruncInst *I, + VFRange &Range); /// Handle non-loop phi nodes. Currently all such phi nodes are turned into /// a sequence of select instructions as the vectorizer currently performs /// full if-conversion. - VPBlendRecipe *tryToBlend(Instruction *I, VPlanPtr &Plan); - - /// Handle call instruction. If \p I is a call that can be widened for \p - /// Range.Start, return a new VPWidenCallRecipe. Range.End may be decreased to - /// ensure same decision from \p Range.Start to \p Range.End. - VPWidenCallRecipe *tryToWidenCall(Instruction *I, VFRange &Range, - VPlan &Plan); - /// Check if \p I is a SelectInst and return a VPWidenSelectRecipe if it is. - /// The function should only be called if the cost-model indicates that - /// widening should be performed. - VPWidenSelectRecipe *tryToWidenSelect(Instruction *I); + VPBlendRecipe *tryToBlend(PHINode *Phi, VPlanPtr &Plan); + + /// Handle call instructions. If \p CI can be widened for \p Range.Start, + /// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same + /// decision from \p Range.Start to \p Range.End. + VPWidenCallRecipe *tryToWidenCall(CallInst *CI, VFRange &Range, VPlan &Plan); /// Check if \p I has an opcode that can be widened and return a VPWidenRecipe /// if it can. The function should only be called if the cost-model indicates @@ -143,9 +140,9 @@ Builder(Builder) {} /// Check if a recipe can be create for \p I withing the given VF \p Range. - /// If a recipe can be created, it adds it to \p VPBB. - bool tryToCreateRecipe(Instruction *Instr, VFRange &Range, VPlanPtr &Plan, - VPBasicBlock *VPBB); + /// If a recipe can be created, return it. Otherwise return nullptr. + VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr, VFRange &Range, + VPlanPtr &Plan); /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it /// is predicated. \return \p VPBB augmented with this new recipe if \p I is