Index: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -979,8 +979,9 @@ class InterleavedAccessInfo { public: InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, - DominatorTree *DT, LoopInfo *LI) - : PSE(PSE), TheLoop(L), DT(DT), LI(LI) {} + DominatorTree *DT, LoopInfo *LI, + const LoopAccessInfo *LAI) + : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {} ~InterleavedAccessInfo() { SmallSet DelSet; @@ -993,7 +994,7 @@ /// \brief Analyze the interleaved accesses and collect them in interleave /// groups. Substitute symbolic strides using \p Strides. - void analyzeInterleaving(const ValueToValueMap &Strides); + void analyzeInterleaving(); /// \brief Check if \p Instr belongs to any interleave group. bool isInterleaved(Instruction *Instr) const { @@ -1013,9 +1014,6 @@ /// out-of-bounds requires a scalar epilogue iteration for correctness. bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; } - /// \brief Initialize the LoopAccessInfo used for dependence checking. - void setLAI(const LoopAccessInfo *Info) { LAI = Info; } - private: /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. /// Simplifies SCEV expressions in the context of existing SCEV assumptions. @@ -1026,7 +1024,7 @@ Loop *TheLoop; DominatorTree *DT; LoopInfo *LI; - const LoopAccessInfo *LAI = nullptr; + const LoopAccessInfo *LAI; /// True if the loop may contain non-reversed interleaved groups with /// out-of-bounds accesses. We ensure we don't speculatively access memory @@ -1523,8 +1521,7 @@ OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC) : TheLoop(L), PSE(PSE), TLI(TLI), TTI(TTI), DT(DT), GetLAA(GetLAA), - ORE(ORE), InterleaveInfo(PSE, L, DT, LI), Requirements(R), Hints(H), - DB(DB), AC(AC) {} + ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {} /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. @@ -1606,22 +1603,6 @@ const LoopAccessInfo *getLAI() const { return LAI; } - /// \brief Check if \p Instr belongs to any interleaved access group. - bool isAccessInterleaved(Instruction *Instr) { - return InterleaveInfo.isInterleaved(Instr); - } - - /// \brief Get the interleaved access group that \p Instr belongs to. - const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) { - return InterleaveInfo.getInterleaveGroup(Instr); - } - - /// \brief Returns true if an interleaved group requires a scalar iteration - /// to handle accesses with gaps. - bool requiresScalarEpilogue() const { - return InterleaveInfo.requiresScalarEpilogue(); - } - unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } uint64_t getMaxSafeRegisterWidth() const { @@ -1718,10 +1699,6 @@ /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; - /// The interleave access information contains groups of interleaved accesses - /// with the same stride and close to each other. - InterleavedAccessInfo InterleaveInfo; - // --- vectorization state --- // /// Holds the primary induction variable. This is the counter of the @@ -1793,9 +1770,10 @@ const TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, OptimizationRemarkEmitter *ORE, const Function *F, - const LoopVectorizeHints *Hints) + const LoopVectorizeHints *Hints, + InterleavedAccessInfo &IAI) : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), - AC(AC), ORE(ORE), TheFunction(F), Hints(Hints) {} + AC(AC), ORE(ORE), TheFunction(F), Hints(Hints), InterleaveInfo(IAI) {} /// \return An upper bound for the vectorization factor, or None if /// vectorization should be avoided up front. @@ -2035,6 +2013,22 @@ /// access that can be widened. bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1); + /// \brief Check if \p Instr belongs to any interleaved access group. + bool isAccessInterleaved(Instruction *Instr) { + return InterleaveInfo.isInterleaved(Instr); + } + + /// \brief Get the interleaved access group that \p Instr belongs to. + const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) { + return InterleaveInfo.getInterleaveGroup(Instr); + } + + /// \brief Returns true if an interleaved group requires a scalar iteration + /// to handle accesses with gaps. + bool requiresScalarEpilogue() const { + return InterleaveInfo.requiresScalarEpilogue(); + } + private: unsigned NumPredStores = 0; @@ -2200,6 +2194,10 @@ /// Loop Vectorize Hint. const LoopVectorizeHints *Hints; + /// The interleave access information contains groups of interleaved accesses + /// with the same stride and close to each other. + InterleavedAccessInfo &InterleaveInfo; + /// Values to ignore in the cost model. SmallPtrSet ValuesToIgnore; @@ -2861,7 +2859,7 @@ // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { - const InterleaveGroup *Group = Legal->getInterleavedAccessGroup(Instr); + const InterleaveGroup *Group = Cost->getInterleavedAccessGroup(Instr); assert(Group && "Fail to get an interleaved access group."); // Skip if current instruction is not the insert position. @@ -3270,7 +3268,7 @@ // does not evenly divide the trip count, no adjustment is necessary since // there will already be scalar iterations. Note that the minimum iterations // check ensures that N >= Step. - if (VF > 1 && Legal->requiresScalarEpilogue()) { + if (VF > 1 && Cost->requiresScalarEpilogue()) { auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0)); R = Builder.CreateSelect(IsZero, Step, R); } @@ -3321,8 +3319,8 @@ // vector trip count is zero. This check also covers the case where adding one // to the backedge-taken count overflowed leading to an incorrect trip count // of zero. In this case we will also jump to the scalar loop. - auto P = Legal->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE - : ICmpInst::ICMP_ULT; + auto P = Cost->requiresScalarEpilogue() ? ICmpInst::ICMP_ULE + : ICmpInst::ICMP_ULT; Value *CheckMinIters = Builder.CreateICmp( P, Count, ConstantInt::get(Count->getType(), VF * UF), "min.iters.check"); @@ -4939,16 +4937,6 @@ : "") << "!\n"); - bool UseInterleaved = TTI->enableInterleavedAccessVectorization(); - - // If an override option has been passed in for interleaved accesses, use it. - if (EnableInterleavedMemAccesses.getNumOccurrences() > 0) - UseInterleaved = EnableInterleavedMemAccesses; - - // Analyze interleaved memory accesses. - if (UseInterleaved) - InterleaveInfo.analyzeInterleaving(*getSymbolicStrides()); - unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; @@ -5639,7 +5627,6 @@ bool LoopVectorizationLegality::canVectorizeMemory() { LAI = &(*GetLAA)(*TheLoop); - InterleaveInfo.setLAI(LAI); const OptimizationRemarkAnalysis *LAR = LAI->getReport(); if (LAR) { ORE->emit([&]() { @@ -5814,9 +5801,9 @@ // this group because it and (2) are dependent. However, (1) can be grouped // with other accesses that may precede it in program order. Note that a // bottom-up order does not imply that WAW dependences should not be checked. -void InterleavedAccessInfo::analyzeInterleaving( - const ValueToValueMap &Strides) { +void InterleavedAccessInfo::analyzeInterleaving() { DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n"); + const ValueToValueMap &Strides = LAI->getSymbolicStrides(); // Holds all accesses with a constant stride. MapVector AccessStrideInfo; @@ -6257,7 +6244,7 @@ // optimization to non-pointer types. // if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I) && - !Legal->isAccessInterleaved(&I) && !isLegalGatherOrScatter(&I)) + !isAccessInterleaved(&I) && !isLegalGatherOrScatter(&I)) continue; MinWidth = std::min(MinWidth, @@ -6918,7 +6905,7 @@ Type *VectorTy = ToVectorTy(ValTy, VF); unsigned AS = getMemInstAddressSpace(I); - auto Group = Legal->getInterleavedAccessGroup(I); + auto Group = getInterleavedAccessGroup(I); assert(Group && "Fail to get an interleaved access group."); unsigned InterleaveFactor = Group->getFactor(); @@ -7018,8 +7005,8 @@ // Choose between Interleaving, Gather/Scatter or Scalarization. unsigned InterleaveCost = std::numeric_limits::max(); unsigned NumAccesses = 1; - if (Legal->isAccessInterleaved(&I)) { - auto Group = Legal->getInterleavedAccessGroup(&I); + if (isAccessInterleaved(&I)) { + auto Group = getInterleavedAccessGroup(&I); assert(Group && "Fail to get an interleaved access group."); // Make one decision for the whole group. @@ -7056,7 +7043,7 @@ // If the instructions belongs to an interleave group, the whole group // receives the same decision. The whole group receives the cost, but // the cost will actually be assigned to one instruction. - if (auto Group = Legal->getInterleavedAccessGroup(&I)) + if (auto Group = getInterleavedAccessGroup(&I)) setWideningDecision(Group, VF, Decision, Cost); else setWideningDecision(&I, VF, Decision, Cost); @@ -7106,7 +7093,7 @@ // Scalarize a widened load of address. setWideningDecision(I, VF, CM_Scalarize, (VF * getMemoryInstructionCost(I, 1))); - else if (auto Group = Legal->getInterleavedAccessGroup(I)) { + else if (auto Group = getInterleavedAccessGroup(I)) { // Scalarize an interleave group of address loads. for (unsigned I = 0; I < Group->getFactor(); ++I) { if (Instruction *Member = Group->getMember(I)) @@ -7697,7 +7684,7 @@ VPInterleaveRecipe * LoopVectorizationPlanner::tryToInterleaveMemory(Instruction *I, VFRange &Range) { - const InterleaveGroup *IG = Legal->getInterleavedAccessGroup(I); + const InterleaveGroup *IG = CM.getInterleavedAccessGroup(I); if (!IG) return nullptr; @@ -8035,7 +8022,7 @@ // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct // member of the IG, do not construct any Recipe for it. - const InterleaveGroup *IG = Legal->getInterleavedAccessGroup(Instr); + const InterleaveGroup *IG = CM.getInterleavedAccessGroup(Instr); if (IG && Instr != IG->getInsertPos() && Range.Start >= 2 && // Query is illegal for VF == 1 CM.getWideningDecision(Instr, Range.Start) == @@ -8436,9 +8423,21 @@ return false; } + bool UseInterleaved = TTI->enableInterleavedAccessVectorization(); + InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL.getLAI()); + + // If an override option has been passed in for interleaved accesses, use it. + if (EnableInterleavedMemAccesses.getNumOccurrences() > 0) + UseInterleaved = EnableInterleavedMemAccesses; + + // Analyze interleaved memory accesses. + if (UseInterleaved) { + IAI.analyzeInterleaving(); + } + // Use the cost model. LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F, - &Hints); + &Hints, IAI); CM.collectValuesToIgnore(); // Use the planner for vectorization.