|
| 1 | +//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===// |
| 2 | +// |
| 3 | +// The LLVM Compiler Infrastructure |
| 4 | +// |
| 5 | +// This file is distributed under the University of Illinois Open Source |
| 6 | +// License. See LICENSE.TXT for details. |
| 7 | +// |
| 8 | +//===----------------------------------------------------------------------===// |
| 9 | +/// |
| 10 | +/// \file |
| 11 | +/// This file provides a LoopVectorizationPlanner class. |
| 12 | +/// InnerLoopVectorizer vectorizes loops which contain only one basic |
| 13 | +/// LoopVectorizationPlanner - drives the vectorization process after having |
| 14 | +/// passed Legality checks. |
| 15 | +/// The planner builds and optimizes the Vectorization Plans which record the |
| 16 | +/// decisions how to vectorize the given loop. In particular, represent the |
| 17 | +/// control-flow of the vectorized version, the replication of instructions that |
| 18 | +/// are to be scalarized, and interleave access groups. |
| 19 | +/// |
| 20 | +/// Also provides a VPlan-based builder utility analogous to IRBuilder. |
| 21 | +/// It provides an instruction-level API for generating VPInstructions while |
| 22 | +/// abstracting away the Recipe manipulation details. |
| 23 | +//===----------------------------------------------------------------------===// |
| 24 | + |
| 25 | +#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H |
| 26 | +#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H |
| 27 | + |
| 28 | +#include "VPlan.h" |
| 29 | + |
| 30 | +namespace llvm { |
| 31 | + |
| 32 | +/// VPlan-based builder utility analogous to IRBuilder. |
| 33 | +class VPBuilder { |
| 34 | +private: |
| 35 | + VPBasicBlock *BB = nullptr; |
| 36 | + VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator(); |
| 37 | + |
| 38 | + VPInstruction *createInstruction(unsigned Opcode, |
| 39 | + std::initializer_list<VPValue *> Operands) { |
| 40 | + VPInstruction *Instr = new VPInstruction(Opcode, Operands); |
| 41 | + BB->insert(Instr, InsertPt); |
| 42 | + return Instr; |
| 43 | + } |
| 44 | + |
| 45 | +public: |
| 46 | + VPBuilder() {} |
| 47 | + |
| 48 | + /// \brief This specifies that created VPInstructions should be appended to |
| 49 | + /// the end of the specified block. |
| 50 | + void setInsertPoint(VPBasicBlock *TheBB) { |
| 51 | + assert(TheBB && "Attempting to set a null insert point"); |
| 52 | + BB = TheBB; |
| 53 | + InsertPt = BB->end(); |
| 54 | + } |
| 55 | + |
| 56 | + VPValue *createNot(VPValue *Operand) { |
| 57 | + return createInstruction(VPInstruction::Not, {Operand}); |
| 58 | + } |
| 59 | + |
| 60 | + VPValue *createAnd(VPValue *LHS, VPValue *RHS) { |
| 61 | + return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}); |
| 62 | + } |
| 63 | + |
| 64 | + VPValue *createOr(VPValue *LHS, VPValue *RHS) { |
| 65 | + return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}); |
| 66 | + } |
| 67 | +}; |
| 68 | + |
| 69 | + |
| 70 | +/// TODO: The following VectorizationFactor was pulled out of |
| 71 | +/// LoopVectorizationCostModel class. LV also deals with |
| 72 | +/// VectorizerParams::VectorizationFactor and VectorizationCostTy. |
| 73 | +/// We need to streamline them. |
| 74 | + |
| 75 | +/// Information about vectorization costs |
| 76 | +struct VectorizationFactor { |
| 77 | + // Vector width with best cost |
| 78 | + unsigned Width; |
| 79 | + // Cost of the loop with that width |
| 80 | + unsigned Cost; |
| 81 | +}; |
| 82 | + |
| 83 | +/// Planner drives the vectorization process after having passed |
| 84 | +/// Legality checks. |
| 85 | +class LoopVectorizationPlanner { |
| 86 | + /// The loop that we evaluate. |
| 87 | + Loop *OrigLoop; |
| 88 | + |
| 89 | + /// Loop Info analysis. |
| 90 | + LoopInfo *LI; |
| 91 | + |
| 92 | + /// Target Library Info. |
| 93 | + const TargetLibraryInfo *TLI; |
| 94 | + |
| 95 | + /// Target Transform Info. |
| 96 | + const TargetTransformInfo *TTI; |
| 97 | + |
| 98 | + /// The legality analysis. |
| 99 | + LoopVectorizationLegality *Legal; |
| 100 | + |
| 101 | + /// The profitablity analysis. |
| 102 | + LoopVectorizationCostModel &CM; |
| 103 | + |
| 104 | + using VPlanPtr = std::unique_ptr<VPlan>; |
| 105 | + |
| 106 | + SmallVector<VPlanPtr, 4> VPlans; |
| 107 | + |
| 108 | + /// This class is used to enable the VPlan to invoke a method of ILV. This is |
| 109 | + /// needed until the method is refactored out of ILV and becomes reusable. |
| 110 | + struct VPCallbackILV : public VPCallback { |
| 111 | + InnerLoopVectorizer &ILV; |
| 112 | + |
| 113 | + VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {} |
| 114 | + |
| 115 | + Value *getOrCreateVectorValues(Value *V, unsigned Part) override; |
| 116 | + }; |
| 117 | + |
| 118 | + /// A builder used to construct the current plan. |
| 119 | + VPBuilder Builder; |
| 120 | + |
| 121 | + /// When we if-convert we need to create edge masks. We have to cache values |
| 122 | + /// so that we don't end up with exponential recursion/IR. Note that |
| 123 | + /// if-conversion currently takes place during VPlan-construction, so these |
| 124 | + /// caches are only used at that stage. |
| 125 | + using EdgeMaskCacheTy = |
| 126 | + DenseMap<std::pair<BasicBlock *, BasicBlock *>, VPValue *>; |
| 127 | + using BlockMaskCacheTy = DenseMap<BasicBlock *, VPValue *>; |
| 128 | + EdgeMaskCacheTy EdgeMaskCache; |
| 129 | + BlockMaskCacheTy BlockMaskCache; |
| 130 | + |
| 131 | + unsigned BestVF = 0; |
| 132 | + unsigned BestUF = 0; |
| 133 | + |
| 134 | +public: |
| 135 | + LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, |
| 136 | + const TargetTransformInfo *TTI, |
| 137 | + LoopVectorizationLegality *Legal, |
| 138 | + LoopVectorizationCostModel &CM) |
| 139 | + : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {} |
| 140 | + |
| 141 | + /// Plan how to best vectorize, return the best VF and its cost. |
| 142 | + VectorizationFactor plan(bool OptForSize, unsigned UserVF); |
| 143 | + |
| 144 | + /// Finalize the best decision and dispose of all other VPlans. |
| 145 | + void setBestPlan(unsigned VF, unsigned UF); |
| 146 | + |
| 147 | + /// Generate the IR code for the body of the vectorized loop according to the |
| 148 | + /// best selected VPlan. |
| 149 | + void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT); |
| 150 | + |
| 151 | + void printPlans(raw_ostream &O) { |
| 152 | + for (const auto &Plan : VPlans) |
| 153 | + O << *Plan; |
| 154 | + } |
| 155 | + |
| 156 | +protected: |
| 157 | + /// Collect the instructions from the original loop that would be trivially |
| 158 | + /// dead in the vectorized loop if generated. |
| 159 | + void collectTriviallyDeadInstructions( |
| 160 | + SmallPtrSetImpl<Instruction *> &DeadInstructions); |
| 161 | + |
| 162 | + /// A range of powers-of-2 vectorization factors with fixed start and |
| 163 | + /// adjustable end. The range includes start and excludes end, e.g.,: |
| 164 | + /// [1, 9) = {1, 2, 4, 8} |
| 165 | + struct VFRange { |
| 166 | + // A power of 2. |
| 167 | + const unsigned Start; |
| 168 | + |
| 169 | + // Need not be a power of 2. If End <= Start range is empty. |
| 170 | + unsigned End; |
| 171 | + }; |
| 172 | + |
| 173 | + /// Test a \p Predicate on a \p Range of VF's. Return the value of applying |
| 174 | + /// \p Predicate on Range.Start, possibly decreasing Range.End such that the |
| 175 | + /// returned value holds for the entire \p Range. |
| 176 | + bool getDecisionAndClampRange(const std::function<bool(unsigned)> &Predicate, |
| 177 | + VFRange &Range); |
| 178 | + |
| 179 | + /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, |
| 180 | + /// according to the information gathered by Legal when it checked if it is |
| 181 | + /// legal to vectorize the loop. |
| 182 | + void buildVPlans(unsigned MinVF, unsigned MaxVF); |
| 183 | + |
| 184 | +private: |
| 185 | + /// A helper function that computes the predicate of the block BB, assuming |
| 186 | + /// that the header block of the loop is set to True. It returns the *entry* |
| 187 | + /// mask for the block BB. |
| 188 | + VPValue *createBlockInMask(BasicBlock *BB, VPlanPtr &Plan); |
| 189 | + |
| 190 | + /// A helper function that computes the predicate of the edge between SRC |
| 191 | + /// and DST. |
| 192 | + VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan); |
| 193 | + |
| 194 | + /// Check if \I belongs to an Interleave Group within the given VF \p Range, |
| 195 | + /// \return true in the first returned value if so and false otherwise. |
| 196 | + /// Build a new VPInterleaveGroup Recipe if \I is the primary member of an IG |
| 197 | + /// for \p Range.Start, and provide it as the second returned value. |
| 198 | + /// Note that if \I is an adjunct member of an IG for \p Range.Start, the |
| 199 | + /// \return value is <true, nullptr>, as it is handled by another recipe. |
| 200 | + /// \p Range.End may be decreased to ensure same decision from \p Range.Start |
| 201 | + /// to \p Range.End. |
| 202 | + VPInterleaveRecipe *tryToInterleaveMemory(Instruction *I, VFRange &Range); |
| 203 | + |
| 204 | + // Check if \I is a memory instruction to be widened for \p Range.Start and |
| 205 | + // potentially masked. Such instructions are handled by a recipe that takes an |
| 206 | + // additional VPInstruction for the mask. |
| 207 | + VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I, |
| 208 | + VFRange &Range, |
| 209 | + VPlanPtr &Plan); |
| 210 | + |
| 211 | + /// Check if an induction recipe should be constructed for \I within the given |
| 212 | + /// VF \p Range. If so build and return it. If not, return null. \p Range.End |
| 213 | + /// may be decreased to ensure same decision from \p Range.Start to |
| 214 | + /// \p Range.End. |
| 215 | + VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I, |
| 216 | + VFRange &Range); |
| 217 | + |
| 218 | + /// Handle non-loop phi nodes. Currently all such phi nodes are turned into |
| 219 | + /// a sequence of select instructions as the vectorizer currently performs |
| 220 | + /// full if-conversion. |
| 221 | + VPBlendRecipe *tryToBlend(Instruction *I, VPlanPtr &Plan); |
| 222 | + |
| 223 | + /// Check if \p I can be widened within the given VF \p Range. If \p I can be |
| 224 | + /// widened for \p Range.Start, check if the last recipe of \p VPBB can be |
| 225 | + /// extended to include \p I or else build a new VPWidenRecipe for it and |
| 226 | + /// append it to \p VPBB. Return true if \p I can be widened for Range.Start, |
| 227 | + /// false otherwise. Range.End may be decreased to ensure same decision from |
| 228 | + /// \p Range.Start to \p Range.End. |
| 229 | + bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range); |
| 230 | + |
| 231 | + /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it |
| 232 | + /// is predicated. \return \p VPBB augmented with this new recipe if \p I is |
| 233 | + /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new |
| 234 | + /// Region. Update the packing decision of predicated instructions if they |
| 235 | + /// feed \p I. Range.End may be decreased to ensure same recipe behavior from |
| 236 | + /// \p Range.Start to \p Range.End. |
| 237 | + VPBasicBlock *handleReplication( |
| 238 | + Instruction *I, VFRange &Range, VPBasicBlock *VPBB, |
| 239 | + DenseMap<Instruction *, VPReplicateRecipe *> &PredInst2Recipe, |
| 240 | + VPlanPtr &Plan); |
| 241 | + |
| 242 | + /// Create a replicating region for instruction \p I that requires |
| 243 | + /// predication. \p PredRecipe is a VPReplicateRecipe holding \p I. |
| 244 | + VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe, |
| 245 | + VPlanPtr &Plan); |
| 246 | + |
| 247 | + /// Build a VPlan according to the information gathered by Legal. \return a |
| 248 | + /// VPlan for vectorization factors \p Range.Start and up to \p Range.End |
| 249 | + /// exclusive, possibly decreasing \p Range.End. |
| 250 | + VPlanPtr buildVPlan(VFRange &Range, |
| 251 | + const SmallPtrSetImpl<Value *> &NeedDef); |
| 252 | +}; |
| 253 | + |
| 254 | +} // namespace llvm |
| 255 | + |
| 256 | +#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H |
0 commit comments