Index: include/llvm/Analysis/VectorUtils.h =================================================================== --- include/llvm/Analysis/VectorUtils.h +++ include/llvm/Analysis/VectorUtils.h @@ -203,9 +203,12 @@ /// /// Note: the interleaved load group could have gaps (missing members), but /// the interleaved store group doesn't allow gaps. -class InterleaveGroup { +template class InterleaveGroup { public: - InterleaveGroup(Instruction *Instr, int Stride, unsigned Align) + InterleaveGroup(unsigned Factor, bool Reverse, unsigned Align) + : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {} + + InterleaveGroup(InstTy *Instr, int Stride, unsigned Align) : Align(Align), InsertPos(Instr) { assert(Align && "The alignment should be non-zero"); @@ -226,7 +229,7 @@ /// negative if it is the new leader. /// /// \returns false if the instruction doesn't belong to the group. - bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) { + bool insertMember(InstTy *Instr, int Index, unsigned NewAlign) { assert(NewAlign && "The new member's alignment should be non-zero"); int Key = Index + SmallestKey; @@ -258,7 +261,7 @@ /// Get the member with the given index \p Index /// /// \returns nullptr if contains no such member. - Instruction *getMember(unsigned Index) const { + InstTy *getMember(unsigned Index) const { int Key = SmallestKey + Index; if (!Members.count(Key)) return nullptr; @@ -268,16 +271,17 @@ /// Get the index for the given member. Unlike the key in the member /// map, the index starts from 0. - unsigned getIndex(Instruction *Instr) const { - for (auto I : Members) + unsigned getIndex(InstTy *Instr) const { + for (auto I : Members) { if (I.second == Instr) return I.first - SmallestKey; + } llvm_unreachable("InterleaveGroup contains no such member"); } - Instruction *getInsertPos() const { return InsertPos; } - void setInsertPos(Instruction *Inst) { InsertPos = Inst; } + InstTy *getInsertPos() const { return InsertPos; } + void setInsertPos(InstTy *Inst) { InsertPos = Inst; } /// Add metadata (e.g. alias info) from the instructions in this group to \p /// NewInst. @@ -285,18 +289,13 @@ /// FIXME: this function currently does not add noalias metadata a'la /// addNewMedata. To do that we need to compute the intersection of the /// noalias info from all members. - void addMetadata(Instruction *NewInst) const { - SmallVector VL; - std::transform(Members.begin(), Members.end(), std::back_inserter(VL), - [](std::pair p) { return p.second; }); - propagateMetadata(NewInst, VL); - } + void addMetadata(InstTy *NewInst) const; private: unsigned Factor; // Interleave Factor. bool Reverse; unsigned Align; - DenseMap Members; + DenseMap Members; int SmallestKey = 0; int LargestKey = 0; @@ -311,7 +310,7 @@ // store i32 %even // %odd = add i32 // Def of %odd // store i32 %odd // Insert Position - Instruction *InsertPos; + InstTy *InsertPos; }; /// Drive the analysis of interleaved memory accesses in the loop. @@ -330,7 +329,7 @@ : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {} ~InterleavedAccessInfo() { - SmallPtrSet DelSet; + SmallPtrSet *, 4> DelSet; // Avoid releasing a pointer twice. for (auto &I : InterleaveGroupMap) DelSet.insert(I.second); @@ -350,12 +349,17 @@ /// Get the interleave group that \p Instr belongs to. /// /// \returns nullptr if doesn't have such group. - InterleaveGroup *getInterleaveGroup(Instruction *Instr) const { + InterleaveGroup *getInterleaveGroup(Instruction *Instr) const { if (InterleaveGroupMap.count(Instr)) return InterleaveGroupMap.find(Instr)->second; return nullptr; } + iterator_range *>> + getInterleaveGroups() { + return make_range(InterleaveGroups.begin(), InterleaveGroups.end()); + } + /// Returns true if an interleaved group that may access memory /// out-of-bounds requires a scalar epilogue iteration for correctness. bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; } @@ -378,7 +382,9 @@ bool RequiresScalarEpilogue = false; /// Holds the relationships between the members and the interleave group. - DenseMap InterleaveGroupMap; + DenseMap *> InterleaveGroupMap; + + SmallPtrSet *, 4> InterleaveGroups; /// Holds dependences among the memory accesses in the loop. It maps a source /// access to a set of dependent sink accesses. @@ -411,20 +417,23 @@ /// stride \p Stride and alignment \p Align. /// /// \returns the newly created interleave group. - InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride, - unsigned Align) { + InterleaveGroup * + createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) { assert(!InterleaveGroupMap.count(Instr) && "Already in an interleaved access group"); - InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align); + InterleaveGroupMap[Instr] = + new InterleaveGroup(Instr, Stride, Align); + InterleaveGroups.insert(InterleaveGroupMap[Instr]); return InterleaveGroupMap[Instr]; } /// Release the group and remove all the relationships. - void releaseGroup(InterleaveGroup *Group) { + void releaseGroup(InterleaveGroup *Group) { for (unsigned i = 0; i < Group->getFactor(); i++) if (Instruction *Member = Group->getMember(i)) InterleaveGroupMap.erase(Member); + InterleaveGroups.erase(Group); delete Group; } Index: lib/Analysis/VectorUtils.cpp =================================================================== --- lib/Analysis/VectorUtils.cpp +++ lib/Analysis/VectorUtils.cpp @@ -685,9 +685,9 @@ collectDependences(); // Holds all interleaved store groups temporarily. - SmallSetVector StoreGroups; + SmallSetVector *, 4> StoreGroups; // Holds all interleaved load groups temporarily. - SmallSetVector LoadGroups; + SmallSetVector *, 4> LoadGroups; // Search in bottom-up program order for pairs of accesses (A and B) that can // form interleaved load or store groups. In the algorithm below, access A @@ -709,7 +709,7 @@ // Initialize a group for B if it has an allowable stride. Even if we don't // create a group for B, we continue with the bottom-up algorithm to ensure // we don't break any of B's dependences. - InterleaveGroup *Group = nullptr; + InterleaveGroup *Group = nullptr; if (isStrided(DesB.Stride)) { Group = getInterleaveGroup(B); if (!Group) { @@ -753,7 +753,7 @@ // illegal code motion. A will then be free to form another group with // instructions that precede it. if (isInterleaved(A)) { - InterleaveGroup *StoreGroup = getInterleaveGroup(A); + InterleaveGroup *StoreGroup = getInterleaveGroup(A); StoreGroups.remove(StoreGroup); releaseGroup(StoreGroup); } @@ -831,7 +831,7 @@ } // Iteration over B accesses. // Remove interleaved store groups with gaps. - for (InterleaveGroup *Group : StoreGroups) + for (auto *Group : StoreGroups) if (Group->getNumMembers() != Group->getFactor()) { LLVM_DEBUG( dbgs() << "LV: Invalidate candidate interleaved store group due " @@ -852,7 +852,7 @@ // This means that we can forcefully peel the loop in order to only have to // check the first pointer for no-wrap. When we'll change to use Assume=true // we'll only need at most one runtime check per interleaved group. - for (InterleaveGroup *Group : LoadGroups) { + for (auto *Group : LoadGroups) { // Case 1: A full group. Can Skip the checks; For full groups, if the wide // load would wrap around the address space we would do a memory access at // nullptr even without the transformation. @@ -902,3 +902,16 @@ } } } + +template <> +void InterleaveGroup::addMetadata(Instruction *NewInst) const { + SmallVector VL; + std::transform(Members.begin(), Members.end(), std::back_inserter(VL), + [](std::pair p) { return p.second; }); + propagateMetadata(NewInst, VL); +} + +template +void InterleaveGroup::addMetadata(InstT *NewInst) const { + llvm_unreachable("addMetadata can only be used for Instruction"); +} Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -929,7 +929,7 @@ /// Save vectorization decision \p W and \p Cost taken by the cost model for /// interleaving group \p Grp and vector width \p VF. - void setWideningDecision(const InterleaveGroup *Grp, unsigned VF, + void setWideningDecision(const InterleaveGroup *Grp, unsigned VF, InstWidening W, unsigned Cost) { assert(VF >= 2 && "Expected VF >=2"); /// Broadcast this decicion to all instructions inside the group. @@ -1057,7 +1057,8 @@ } /// Get the interleaved access group that \p Instr belongs to. - const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) { + const InterleaveGroup * + getInterleavedAccessGroup(Instruction *Instr) { return InterleaveInfo.getInterleaveGroup(Instr); } @@ -1874,7 +1875,8 @@ // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { - const InterleaveGroup *Group = Cost->getInterleavedAccessGroup(Instr); + const InterleaveGroup *Group = + Cost->getInterleavedAccessGroup(Instr); assert(Group && "Fail to get an interleaved access group."); // Skip if current instruction is not the insert position. @@ -5892,7 +5894,7 @@ VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I, VFRange &Range) { - const InterleaveGroup *IG = CM.getInterleavedAccessGroup(I); + const InterleaveGroup *IG = CM.getInterleavedAccessGroup(I); if (!IG) return nullptr; @@ -6292,7 +6294,8 @@ // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct // member of the IG, do not construct any Recipe for it. - const InterleaveGroup *IG = CM.getInterleavedAccessGroup(Instr); + const InterleaveGroup *IG = + CM.getInterleavedAccessGroup(Instr); if (IG && Instr != IG->getInsertPos() && Range.Start >= 2 && // Query is illegal for VF == 1 CM.getWideningDecision(Instr, Range.Start) == Index: lib/Transforms/Vectorize/VPlan.h =================================================================== --- lib/Transforms/Vectorize/VPlan.h +++ lib/Transforms/Vectorize/VPlan.h @@ -36,6 +36,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/IRBuilder.h" #include #include @@ -50,7 +51,7 @@ class BasicBlock; class DominatorTree; class InnerLoopVectorizer; -class InterleaveGroup; +template class InterleaveGroup; class LoopInfo; class raw_ostream; class Value; @@ -746,10 +747,10 @@ /// or stores into one wide load/store and shuffles. class VPInterleaveRecipe : public VPRecipeBase { private: - const InterleaveGroup *IG; + const InterleaveGroup *IG; public: - VPInterleaveRecipe(const InterleaveGroup *IG) + VPInterleaveRecipe(const InterleaveGroup *IG) : VPRecipeBase(VPInterleaveSC), IG(IG) {} ~VPInterleaveRecipe() override = default; @@ -764,7 +765,7 @@ /// Print the recipe. void print(raw_ostream &O, const Twine &Indent) const override; - const InterleaveGroup *getInterleaveGroup() { return IG; } + const InterleaveGroup *getInterleaveGroup() { return IG; } }; /// VPReplicateRecipe replicates a given instruction producing multiple scalar @@ -1334,6 +1335,34 @@ } }; +class VPInterleavedAccessInfo { +private: + DenseMap *> + InterleaveGroupMap; + +public: + VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI); + + ~VPInterleavedAccessInfo() { + SmallPtrSet *, 4> DelSet; + // Avoid releasing a pointer twice. + for (auto &I : InterleaveGroupMap) + DelSet.insert(I.second); + for (auto *Ptr : DelSet) + delete Ptr; + } + + /// Get the interleave group that \p Instr belongs to. + /// + /// \returns nullptr if doesn't have such group. + InterleaveGroup * + getInterleaveGroup(VPInstruction *Instr) const { + if (InterleaveGroupMap.count(Instr)) + return InterleaveGroupMap.find(Instr)->second; + return nullptr; + } +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H Index: lib/Transforms/Vectorize/VPlan.cpp =================================================================== --- lib/Transforms/Vectorize/VPlan.cpp +++ lib/Transforms/Vectorize/VPlan.cpp @@ -576,3 +576,36 @@ } O << "\\l\""; } + +VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, + InterleavedAccessInfo &IAI) { + DenseMap *, InterleaveGroup *> + Old2New; + + VPRegionBlock *TopRegion = dyn_cast(Plan.getEntry()); + ReversePostOrderTraversal RPOT(TopRegion->getEntry()); + for (VPBlockBase *Base : RPOT) { + VPBasicBlock *VPBB = Base->getEntryBasicBlock(); + for (auto I = VPBB->begin(), E = VPBB->end(); I != E; I++) { + assert(isa(&*I) && "Can only handle VPInstructions"); + VPInstruction *VPInst = cast(&*I); + Instruction *Inst = cast(VPInst->getUnderlyingValue()); + auto *IG = IAI.getInterleaveGroup(Inst); + if (!IG) + continue; + + auto NewIGIter = Old2New.find(IG); + if (NewIGIter == Old2New.end()) + Old2New[IG] = new InterleaveGroup( + IG->getFactor(), IG->isReverse(), IG->getAlignment()); + + if (Inst == IG->getInsertPos()) + Old2New[IG]->setInsertPos(VPInst); + + InterleaveGroupMap[VPInst] = Old2New[IG]; + InterleaveGroupMap[VPInst]->insertMember( + VPInst, IG->getIndex(Inst), + IG->isReverse() ? (-1) * int(IG->getFactor()) : IG->getFactor()); + } + } +} Index: lib/Transforms/Vectorize/VPlanValue.h =================================================================== --- lib/Transforms/Vectorize/VPlanValue.h +++ lib/Transforms/Vectorize/VPlanValue.h @@ -38,6 +38,8 @@ // and live-outs which the VPlan will need to fix accordingly. class VPValue { friend class VPBuilder; + friend class VPInterleavedAccessInfo; + private: const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).