Index: llvm/trunk/lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -212,16 +212,6 @@ /// A builder used to construct the current plan. VPBuilder Builder; - /// When we if-convert we need to create edge masks. We have to cache values - /// so that we don't end up with exponential recursion/IR. Note that - /// if-conversion currently takes place during VPlan-construction, so these - /// caches are only used at that stage. - using EdgeMaskCacheTy = - DenseMap, VPValue *>; - using BlockMaskCacheTy = DenseMap; - EdgeMaskCacheTy EdgeMaskCache; - BlockMaskCacheTy BlockMaskCache; - unsigned BestVF = 0; unsigned BestUF = 0; @@ -251,97 +241,25 @@ O << *Plan; } + /// Test a \p Predicate on a \p Range of VF's. Return the value of applying + /// \p Predicate on Range.Start, possibly decreasing Range.End such that the + /// returned value holds for the entire \p Range. + static bool + getDecisionAndClampRange(const std::function &Predicate, + VFRange &Range); + protected: /// Collect the instructions from the original loop that would be trivially /// dead in the vectorized loop if generated. void collectTriviallyDeadInstructions( SmallPtrSetImpl &DeadInstructions); - /// A range of powers-of-2 vectorization factors with fixed start and - /// adjustable end. The range includes start and excludes end, e.g.,: - /// [1, 9) = {1, 2, 4, 8} - struct VFRange { - // A power of 2. - const unsigned Start; - - // Need not be a power of 2. If End <= Start range is empty. - unsigned End; - }; - - /// Test a \p Predicate on a \p Range of VF's. Return the value of applying - /// \p Predicate on Range.Start, possibly decreasing Range.End such that the - /// returned value holds for the entire \p Range. - bool getDecisionAndClampRange(const std::function &Predicate, - VFRange &Range); - /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is /// legal to vectorize the loop. void buildVPlans(unsigned MinVF, unsigned MaxVF); private: - /// A helper function that computes the predicate of the block BB, assuming - /// that the header block of the loop is set to True. It returns the *entry* - /// mask for the block BB. - VPValue *createBlockInMask(BasicBlock *BB, VPlanPtr &Plan); - - /// A helper function that computes the predicate of the edge between SRC - /// and DST. - VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan); - - /// Check if \I belongs to an Interleave Group within the given VF \p Range, - /// \return true in the first returned value if so and false otherwise. - /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG - /// for \p Range.Start, and provide it as the second returned value. - /// Note that if \I is an adjunct member of an IG for \p Range.Start, the - /// \return value is , as it is handled by another recipe. - /// \p Range.End may be decreased to ensure same decision from \p Range.Start - /// to \p Range.End. - VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range); - - // Check if \I is a memory instruction to be widened for \p Range.Start and - // potentially masked. Such instructions are handled by a recipe that takes an - // additional VPInstruction for the mask. - VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I, - VFRange &Range, - VPlanPtr &Plan); - - /// Check if an induction recipe should be constructed for \I within the given - /// VF \p Range. If so build and return it. If not, return null. \p Range.End - /// may be decreased to ensure same decision from \p Range.Start to - /// \p Range.End. - VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I, - VFRange &Range); - - /// Handle non-loop phi nodes. Currently all such phi nodes are turned into - /// a sequence of select instructions as the vectorizer currently performs - /// full if-conversion. - VPBlendRecipe *tryToBlend(Instruction *I, VPlanPtr &Plan); - - /// Check if \p I can be widened within the given VF \p Range. If \p I can be - /// widened for \p Range.Start, check if the last recipe of \p VPBB can be - /// extended to include \p I or else build a new VPWidenRecipe for it and - /// append it to \p VPBB. Return true if \p I can be widened for Range.Start, - /// false otherwise. Range.End may be decreased to ensure same decision from - /// \p Range.Start to \p Range.End. - bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range); - - /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it - /// is predicated. \return \p VPBB augmented with this new recipe if \p I is - /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new - /// Region. Update the packing decision of predicated instructions if they - /// feed \p I. Range.End may be decreased to ensure same recipe behavior from - /// \p Range.Start to \p Range.End. - VPBasicBlock *handleReplication( - Instruction *I, VFRange &Range, VPBasicBlock *VPBB, - DenseMap &PredInst2Recipe, - VPlanPtr &Plan); - - /// Create a replicating region for instruction \p I that requires - /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I. - VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe, - VPlanPtr &Plan); - /// Build a VPlan according to the information gathered by Legal. \return a /// VPlan for vectorization factors \p Range.Start and up to \p Range.End /// exclusive, possibly decreasing \p Range.End. Index: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -56,6 +56,7 @@ #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "LoopVectorizationPlanner.h" +#include "VPRecipeBuilder.h" #include "VPlanHCFGBuilder.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -6502,9 +6503,8 @@ } } -VPValue *LoopVectorizationPlanner::createEdgeMask(BasicBlock *Src, - BasicBlock *Dst, - VPlanPtr &Plan) { +VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst, + VPlanPtr &Plan) { assert(is_contained(predecessors(Dst), Src) && "Invalid edge"); // Look for cached value. @@ -6534,8 +6534,7 @@ return EdgeMaskCache[Edge] = EdgeMask; } -VPValue *LoopVectorizationPlanner::createBlockInMask(BasicBlock *BB, - VPlanPtr &Plan) { +VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { assert(OrigLoop->contains(BB) && "Block is not a part of a loop"); // Look for cached value. @@ -6568,9 +6567,8 @@ return BlockMaskCache[BB] = BlockMask; } -VPInterleaveRecipe * -LoopVectorizationPlanner::tryToInterleaveMemory(Instruction *I, - VFRange &Range) { +VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I, + VFRange &Range) { const InterleaveGroup *IG = CM.getInterleavedAccessGroup(I); if (!IG) return nullptr; @@ -6583,7 +6581,7 @@ LoopVectorizationCostModel::CM_Interleave); }; }; - if (!getDecisionAndClampRange(isIGMember(I), Range)) + if (!LoopVectorizationPlanner::getDecisionAndClampRange(isIGMember(I), Range)) return nullptr; // I is a member of an InterleaveGroup for VF's in the (possibly trimmed) @@ -6596,8 +6594,8 @@ } VPWidenMemoryInstructionRecipe * -LoopVectorizationPlanner::tryToWidenMemory(Instruction *I, VFRange &Range, - VPlanPtr &Plan) { +VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, + VPlanPtr &Plan) { if (!isa(I) && !isa(I)) return nullptr; @@ -6616,7 +6614,7 @@ return Decision != LoopVectorizationCostModel::CM_Scalarize; }; - if (!getDecisionAndClampRange(willWiden, Range)) + if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) return nullptr; VPValue *Mask = nullptr; @@ -6627,8 +6625,7 @@ } VPWidenIntOrFpInductionRecipe * -LoopVectorizationPlanner::tryToOptimizeInduction(Instruction *I, - VFRange &Range) { +VPRecipeBuilder::tryToOptimizeInduction(Instruction *I, VFRange &Range) { if (PHINode *Phi = dyn_cast(I)) { // Check if this is an integer or fp induction. If so, build the recipe that // produces its scalar and vector values. @@ -6653,15 +6650,14 @@ [=](unsigned VF) -> bool { return CM.isOptimizableIVTruncate(K, VF); }; }; - if (isa(I) && - getDecisionAndClampRange(isOptimizableIVTruncate(I), Range)) + if (isa(I) && LoopVectorizationPlanner::getDecisionAndClampRange( + isOptimizableIVTruncate(I), Range)) return new VPWidenIntOrFpInductionRecipe(cast(I->getOperand(0)), cast(I)); return nullptr; } -VPBlendRecipe * -LoopVectorizationPlanner::tryToBlend(Instruction *I, VPlanPtr &Plan) { +VPBlendRecipe *VPRecipeBuilder::tryToBlend(Instruction *I, VPlanPtr &Plan) { PHINode *Phi = dyn_cast(I); if (!Phi || Phi->getParent() == OrigLoop->getHeader()) return nullptr; @@ -6685,8 +6681,8 @@ return new VPBlendRecipe(Phi, Masks); } -bool LoopVectorizationPlanner::tryToWiden(Instruction *I, VPBasicBlock *VPBB, - VFRange &Range) { +bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB, + VFRange &Range) { if (CM.isScalarWithPredication(I)) return false; @@ -6771,7 +6767,7 @@ return true; }; - if (!getDecisionAndClampRange(willWiden, Range)) + if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range)) return false; // Success: widen this instruction. We optimize the common case where @@ -6786,11 +6782,11 @@ return true; } -VPBasicBlock *LoopVectorizationPlanner::handleReplication( +VPBasicBlock *VPRecipeBuilder::handleReplication( Instruction *I, VFRange &Range, VPBasicBlock *VPBB, DenseMap &PredInst2Recipe, VPlanPtr &Plan) { - bool IsUniform = getDecisionAndClampRange( + bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange( [&](unsigned VF) { return CM.isUniformAfterVectorization(I, VF); }, Range); @@ -6823,10 +6819,9 @@ return RegSucc; } -VPRegionBlock * -LoopVectorizationPlanner::createReplicateRegion(Instruction *Instr, - VPRecipeBase *PredRecipe, - VPlanPtr &Plan) { +VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr, + VPRecipeBase *PredRecipe, + VPlanPtr &Plan) { // Instructions marked for predication are replicated and placed under an // if-then construct to prevent side-effects. @@ -6852,6 +6847,45 @@ return Region; } +bool VPRecipeBuilder::tryToCreateRecipe(Instruction *Instr, VFRange &Range, + VPlanPtr &Plan, VPBasicBlock *VPBB) { + VPRecipeBase *Recipe = nullptr; + // Check if Instr should belong to an interleave memory recipe, or already + // does. In the latter case Instr is irrelevant. + if ((Recipe = tryToInterleaveMemory(Instr, Range))) { + VPBB->appendRecipe(Recipe); + return true; + } + + // Check if Instr is a memory operation that should be widened. + if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) { + VPBB->appendRecipe(Recipe); + return true; + } + + // Check if Instr should form some PHI recipe. + if ((Recipe = tryToOptimizeInduction(Instr, Range))) { + VPBB->appendRecipe(Recipe); + return true; + } + if ((Recipe = tryToBlend(Instr, Plan))) { + VPBB->appendRecipe(Recipe); + return true; + } + if (PHINode *Phi = dyn_cast(Instr)) { + VPBB->appendRecipe(new VPWidenPHIRecipe(Phi)); + return true; + } + + // Check if Instr is to be widened by a general VPWidenRecipe, after + // having first checked for specific widening recipes that deal with + // Interleave Groups, Inductions and Phi nodes. + if (tryToWiden(Instr, VPBB, Range)) + return true; + + return false; +} + void LoopVectorizationPlanner::buildVPlansWithVPRecipes(unsigned MinVF, unsigned MaxVF) { assert(OrigLoop->empty() && "Inner loop expected."); @@ -6895,8 +6929,6 @@ // scalar instead of vector value. DenseMap PredInst2Recipe; - EdgeMaskCache.clear(); - BlockMaskCache.clear(); DenseMap &SinkAfter = Legal->getSinkAfter(); DenseMap SinkAfterInverse; @@ -6904,6 +6936,7 @@ VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); auto Plan = llvm::make_unique(VPBB); + VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, TTI, Legal, CM, Builder); // Represent values that will have defs inside VPlan. for (Value *V : NeedDef) Plan->addVPValue(V); @@ -6969,45 +7002,13 @@ // Introduce each ingredient into VPlan. for (Instruction *Instr : Ingredients) { - VPRecipeBase *Recipe = nullptr; - - // Check if Instr should belong to an interleave memory recipe, or already - // does. In the latter case Instr is irrelevant. - if ((Recipe = tryToInterleaveMemory(Instr, Range))) { - VPBB->appendRecipe(Recipe); - continue; - } - - // Check if Instr is a memory operation that should be widened. - if ((Recipe = tryToWidenMemory(Instr, Range, Plan))) { - VPBB->appendRecipe(Recipe); - continue; - } - - // Check if Instr should form some PHI recipe. - if ((Recipe = tryToOptimizeInduction(Instr, Range))) { - VPBB->appendRecipe(Recipe); - continue; - } - if ((Recipe = tryToBlend(Instr, Plan))) { - VPBB->appendRecipe(Recipe); - continue; - } - if (PHINode *Phi = dyn_cast(Instr)) { - VPBB->appendRecipe(new VPWidenPHIRecipe(Phi)); - continue; - } - - // Check if Instr is to be widened by a general VPWidenRecipe, after - // having first checked for specific widening recipes that deal with - // Interleave Groups, Inductions and Phi nodes. - if (tryToWiden(Instr, VPBB, Range)) + if (RecipeBuilder.tryToCreateRecipe(Instr, Range, Plan, VPBB)) continue; // Otherwise, if all widening options failed, Instruction is to be // replicated. This may create a successor for VPBB. - VPBasicBlock *NextVPBB = - handleReplication(Instr, Range, VPBB, PredInst2Recipe, Plan); + VPBasicBlock *NextVPBB = RecipeBuilder.handleReplication( + Instr, Range, VPBB, PredInst2Recipe, Plan); if (NextVPBB != VPBB) { VPBB = NextVPBB; VPBB->setName(BB->hasName() ? BB->getName() + "." + Twine(VPBBsForBB++) Index: llvm/trunk/lib/Transforms/Vectorize/VPRecipeBuilder.h =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ llvm/trunk/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -0,0 +1,131 @@ +//===- VPRecipeBuilder.h - Helper class to build recipes --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H +#define LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H + +#include "LoopVectorizationPlanner.h" +#include "VPlan.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/IRBuilder.h" + +namespace llvm { + +class LoopVectorizationLegality; +class LoopVectorizationCostModel; +class TargetTransformInfo; +class TargetLibraryInfo; + +/// Helper class to create VPRecipies from IR instructions. +class VPRecipeBuilder { + /// The loop that we evaluate. + Loop *OrigLoop; + + /// Target Library Info. + const TargetLibraryInfo *TLI; + + /// Target Transform Info. + const TargetTransformInfo *TTI; + + /// The legality analysis. + LoopVectorizationLegality *Legal; + + /// The profitablity analysis. + LoopVectorizationCostModel &CM; + + VPBuilder &Builder; + + /// When we if-convert we need to create edge masks. We have to cache values + /// so that we don't end up with exponential recursion/IR. Note that + /// if-conversion currently takes place during VPlan-construction, so these + /// caches are only used at that stage. + using EdgeMaskCacheTy = + DenseMap, VPValue *>; + using BlockMaskCacheTy = DenseMap; + EdgeMaskCacheTy EdgeMaskCache; + BlockMaskCacheTy BlockMaskCache; + +public: + /// A helper function that computes the predicate of the block BB, assuming + /// that the header block of the loop is set to True. It returns the *entry* + /// mask for the block BB. + VPValue *createBlockInMask(BasicBlock *BB, VPlanPtr &Plan); + + /// A helper function that computes the predicate of the edge between SRC + /// and DST. + VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan); + + /// Check if \I belongs to an Interleave Group within the given VF \p Range, + /// \return true in the first returned value if so and false otherwise. + /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG + /// for \p Range.Start, and provide it as the second returned value. + /// Note that if \I is an adjunct member of an IG for \p Range.Start, the + /// \return value is , as it is handled by another recipe. + /// \p Range.End may be decreased to ensure same decision from \p Range.Start + /// to \p Range.End. + VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range); + + /// Check if \I is a memory instruction to be widened for \p Range.Start and + /// potentially masked. Such instructions are handled by a recipe that takes + /// an additional VPInstruction for the mask. + VPWidenMemoryInstructionRecipe * + tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan); + + /// Check if an induction recipe should be constructed for \I within the given + /// VF \p Range. If so build and return it. If not, return null. \p Range.End + /// may be decreased to ensure same decision from \p Range.Start to + /// \p Range.End. + VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I, + VFRange &Range); + + /// Handle non-loop phi nodes. Currently all such phi nodes are turned into + /// a sequence of select instructions as the vectorizer currently performs + /// full if-conversion. + VPBlendRecipe *tryToBlend(Instruction *I, VPlanPtr &Plan); + + /// Check if \p I can be widened within the given VF \p Range. If \p I can be + /// widened for \p Range.Start, check if the last recipe of \p VPBB can be + /// extended to include \p I or else build a new VPWidenRecipe for it and + /// append it to \p VPBB. Return true if \p I can be widened for Range.Start, + /// false otherwise. Range.End may be decreased to ensure same decision from + /// \p Range.Start to \p Range.End. + bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range); + + /// Create a replicating region for instruction \p I that requires + /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I. + VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe, + VPlanPtr &Plan); + +public: + VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, + LoopVectorizationLegality *Legal, + LoopVectorizationCostModel &CM, VPBuilder &Builder) + : OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), + Builder(Builder) {} + + /// Check if a recipe can be create for \p I withing the given VF \p Range. + /// If a recipe can be created, it adds it to \p VPBB. + bool tryToCreateRecipe(Instruction *Instr, VFRange &Range, VPlanPtr &Plan, + VPBasicBlock *VPBB); + + /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it + /// is predicated. \return \p VPBB augmented with this new recipe if \p I is + /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new + /// Region. Update the packing decision of predicated instructions if they + /// feed \p I. Range.End may be decreased to ensure same recipe behavior from + /// \p Range.Start to \p Range.End. + VPBasicBlock *handleReplication( + Instruction *I, VFRange &Range, VPBasicBlock *VPBB, + DenseMap &PredInst2Recipe, + VPlanPtr &Plan); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_VECTORIZE_VPRECIPEBUILDER_H Index: llvm/trunk/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/VPlan.h +++ llvm/trunk/lib/Transforms/Vectorize/VPlan.h @@ -55,6 +55,20 @@ class Value; class VPBasicBlock; class VPRegionBlock; +class VPlan; + +/// A range of powers-of-2 vectorization factors with fixed start and +/// adjustable end. The range includes start and excludes end, e.g.,: +/// [1, 9) = {1, 2, 4, 8} +struct VFRange { + // A power of 2. + const unsigned Start; + + // Need not be a power of 2. If End <= Start range is empty. + unsigned End; +}; + +using VPlanPtr = std::unique_ptr; /// In what follows, the term "input IR" refers to code that is fed into the /// vectorizer whereas the term "output IR" refers to code that is generated by @@ -1290,6 +1304,7 @@ To->removePredecessor(From); } }; + } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H