Index: include/llvm/Analysis/VectorUtils.h =================================================================== --- include/llvm/Analysis/VectorUtils.h +++ include/llvm/Analysis/VectorUtils.h @@ -24,7 +24,7 @@ template class ArrayRef; class DemandedBits; class GetElementPtrInst; -class InterleaveGroup; +template class InterleaveGroup; class Loop; class ScalarEvolution; class TargetTransformInfo; @@ -138,7 +138,7 @@ /// create[*]Mask() utilities which create a shuffle mask (mask that /// consists of indices). Constant *createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, - const InterleaveGroup &Group); + const InterleaveGroup &Group); /// Create a mask with replicated elements. /// @@ -233,9 +233,12 @@ /// /// Note: the interleaved load group could have gaps (missing members), but /// the interleaved store group doesn't allow gaps. -class InterleaveGroup { +template class InterleaveGroup { public: - InterleaveGroup(Instruction *Instr, int Stride, unsigned Align) + InterleaveGroup(unsigned Factor, bool Reverse, unsigned Align) + : Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {} + + InterleaveGroup(InstTy *Instr, int Stride, unsigned Align) : Align(Align), InsertPos(Instr) { assert(Align && "The alignment should be non-zero"); @@ -256,7 +259,7 @@ /// negative if it is the new leader. /// /// \returns false if the instruction doesn't belong to the group. - bool insertMember(Instruction *Instr, int Index, unsigned NewAlign) { + bool insertMember(InstTy *Instr, int Index, unsigned NewAlign) { assert(NewAlign && "The new member's alignment should be non-zero"); int Key = Index + SmallestKey; @@ -288,7 +291,7 @@ /// Get the member with the given index \p Index /// /// \returns nullptr if contains no such member. - Instruction *getMember(unsigned Index) const { + InstTy *getMember(unsigned Index) const { int Key = SmallestKey + Index; auto Member = Members.find(Key); if (Member == Members.end()) @@ -299,16 +302,17 @@ /// Get the index for the given member. Unlike the key in the member /// map, the index starts from 0. - unsigned getIndex(Instruction *Instr) const { - for (auto I : Members) + unsigned getIndex(const InstTy *Instr) const { + for (auto I : Members) { if (I.second == Instr) return I.first - SmallestKey; + } llvm_unreachable("InterleaveGroup contains no such member"); } - Instruction *getInsertPos() const { return InsertPos; } - void setInsertPos(Instruction *Inst) { InsertPos = Inst; } + InstTy *getInsertPos() const { return InsertPos; } + void setInsertPos(InstTy *Inst) { InsertPos = Inst; } /// Add metadata (e.g. alias info) from the instructions in this group to \p /// NewInst. @@ -316,12 +320,7 @@ /// FIXME: this function currently does not add noalias metadata a'la /// addNewMedata. To do that we need to compute the intersection of the /// noalias info from all members. - void addMetadata(Instruction *NewInst) const { - SmallVector VL; - std::transform(Members.begin(), Members.end(), std::back_inserter(VL), - [](std::pair p) { return p.second; }); - propagateMetadata(NewInst, VL); - } + void addMetadata(InstTy *NewInst) const; /// Returns true if this Group requires a scalar iteration to handle gaps. bool requiresScalarEpilogue() const { @@ -344,7 +343,7 @@ unsigned Factor; // Interleave Factor. bool Reverse; unsigned Align; - DenseMap Members; + DenseMap Members; int SmallestKey = 0; int LargestKey = 0; @@ -359,7 +358,7 @@ // store i32 %even // %odd = add i32 // Def of %odd // store i32 %odd // Insert Position - Instruction *InsertPos; + InstTy *InsertPos; }; /// Drive the analysis of interleaved memory accesses in the loop. @@ -390,7 +389,7 @@ /// formation for predicated accesses, we may be able to relax this limitation /// in the future once we handle more complicated blocks. void reset() { - SmallPtrSet DelSet; + SmallPtrSet *, 4> DelSet; // Avoid releasing a pointer twice. for (auto &I : InterleaveGroupMap) DelSet.insert(I.second); @@ -409,11 +408,16 @@ /// Get the interleave group that \p Instr belongs to. /// /// \returns nullptr if doesn't have such group. - InterleaveGroup *getInterleaveGroup(Instruction *Instr) const { - auto Group = InterleaveGroupMap.find(Instr); - if (Group == InterleaveGroupMap.end()) - return nullptr; - return Group->second; + InterleaveGroup * + getInterleaveGroup(const Instruction *Instr) const { + if (InterleaveGroupMap.count(Instr)) + return InterleaveGroupMap.find(Instr)->second; + return nullptr; + } + + iterator_range *>> + getInterleaveGroups() { + return make_range(InterleaveGroups.begin(), InterleaveGroups.end()); } /// Returns true if an interleaved group that may access memory @@ -443,7 +447,9 @@ bool RequiresScalarEpilogue = false; /// Holds the relationships between the members and the interleave group. - DenseMap InterleaveGroupMap; + DenseMap *> InterleaveGroupMap; + + SmallPtrSet *, 4> InterleaveGroups; /// Holds dependences among the memory accesses in the loop. It maps a source /// access to a set of dependent sink accesses. @@ -476,19 +482,23 @@ /// stride \p Stride and alignment \p Align. /// /// \returns the newly created interleave group. - InterleaveGroup *createInterleaveGroup(Instruction *Instr, int Stride, - unsigned Align) { - assert(!isInterleaved(Instr) && "Already in an interleaved access group"); - InterleaveGroupMap[Instr] = new InterleaveGroup(Instr, Stride, Align); + InterleaveGroup * + createInterleaveGroup(Instruction *Instr, int Stride, unsigned Align) { + assert(!InterleaveGroupMap.count(Instr) && + "Already in an interleaved access group"); + InterleaveGroupMap[Instr] = + new InterleaveGroup(Instr, Stride, Align); + InterleaveGroups.insert(InterleaveGroupMap[Instr]); return InterleaveGroupMap[Instr]; } /// Release the group and remove all the relationships. - void releaseGroup(InterleaveGroup *Group) { + void releaseGroup(InterleaveGroup *Group) { for (unsigned i = 0; i < Group->getFactor(); i++) if (Instruction *Member = Group->getMember(i)) InterleaveGroupMap.erase(Member); + InterleaveGroups.erase(Group); delete Group; } Index: lib/Analysis/VectorUtils.cpp =================================================================== --- lib/Analysis/VectorUtils.cpp +++ lib/Analysis/VectorUtils.cpp @@ -505,8 +505,9 @@ return Inst; } -Constant *llvm::createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, - const InterleaveGroup &Group) { +Constant * +llvm::createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, + const InterleaveGroup &Group) { // All 1's means mask is not needed. if (Group.getNumMembers() == Group.getFactor()) return nullptr; @@ -720,9 +721,9 @@ collectDependences(); // Holds all interleaved store groups temporarily. - SmallSetVector StoreGroups; + SmallSetVector *, 4> StoreGroups; // Holds all interleaved load groups temporarily. - SmallSetVector LoadGroups; + SmallSetVector *, 4> LoadGroups; // Search in bottom-up program order for pairs of accesses (A and B) that can // form interleaved load or store groups. In the algorithm below, access A @@ -744,7 +745,7 @@ // Initialize a group for B if it has an allowable stride. Even if we don't // create a group for B, we continue with the bottom-up algorithm to ensure // we don't break any of B's dependences. - InterleaveGroup *Group = nullptr; + InterleaveGroup *Group = nullptr; if (isStrided(DesB.Stride) && (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) { Group = getInterleaveGroup(B); @@ -789,7 +790,7 @@ // illegal code motion. A will then be free to form another group with // instructions that precede it. if (isInterleaved(A)) { - InterleaveGroup *StoreGroup = getInterleaveGroup(A); + InterleaveGroup *StoreGroup = getInterleaveGroup(A); StoreGroups.remove(StoreGroup); releaseGroup(StoreGroup); } @@ -868,7 +869,7 @@ } // Iteration over B accesses. // Remove interleaved store groups with gaps. - for (InterleaveGroup *Group : StoreGroups) + for (auto *Group : StoreGroups) if (Group->getNumMembers() != Group->getFactor()) { LLVM_DEBUG( dbgs() << "LV: Invalidate candidate interleaved store group due " @@ -889,7 +890,7 @@ // This means that we can forcefully peel the loop in order to only have to // check the first pointer for no-wrap. When we'll change to use Assume=true // we'll only need at most one runtime check per interleaved group. - for (InterleaveGroup *Group : LoadGroups) { + for (auto *Group : LoadGroups) { // Case 1: A full group. Can Skip the checks; For full groups, if the wide // load would wrap around the address space we would do a memory access at // nullptr even without the transformation. @@ -947,9 +948,9 @@ return; // Avoid releasing a Group twice. - SmallPtrSet DelSet; + SmallPtrSet *, 4> DelSet; for (auto &I : InterleaveGroupMap) { - InterleaveGroup *Group = I.second; + InterleaveGroup *Group = I.second; if (Group->requiresScalarEpilogue()) DelSet.insert(Group); } @@ -964,3 +965,16 @@ RequiresScalarEpilogue = false; } + +template <> +void InterleaveGroup::addMetadata(Instruction *NewInst) const { + SmallVector VL; + std::transform(Members.begin(), Members.end(), std::back_inserter(VL), + [](std::pair p) { return p.second; }); + propagateMetadata(NewInst, VL); +} + +template +void InterleaveGroup::addMetadata(InstT *NewInst) const { + llvm_unreachable("addMetadata can only be used for Instruction"); +} Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -976,7 +976,7 @@ /// Save vectorization decision \p W and \p Cost taken by the cost model for /// interleaving group \p Grp and vector width \p VF. - void setWideningDecision(const InterleaveGroup *Grp, unsigned VF, + void setWideningDecision(const InterleaveGroup *Grp, unsigned VF, InstWidening W, unsigned Cost) { assert(VF >= 2 && "Expected VF >=2"); /// Broadcast this decicion to all instructions inside the group. @@ -1131,7 +1131,8 @@ } /// Get the interleaved access group that \p Instr belongs to. - const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) { + const InterleaveGroup * + getInterleavedAccessGroup(Instruction *Instr) { return InterleaveInfo.getInterleaveGroup(Instr); } @@ -1994,7 +1995,8 @@ // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr, VectorParts *BlockInMask) { - const InterleaveGroup *Group = Cost->getInterleavedAccessGroup(Instr); + const InterleaveGroup *Group = + Cost->getInterleavedAccessGroup(Instr); assert(Group && "Fail to get an interleaved access group."); // Skip if current instruction is not the insert position. @@ -6377,7 +6379,7 @@ VPInterleaveRecipe *VPRecipeBuilder::tryToInterleaveMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan) { - const InterleaveGroup *IG = CM.getInterleavedAccessGroup(I); + const InterleaveGroup *IG = CM.getInterleavedAccessGroup(I); if (!IG) return nullptr; @@ -6793,7 +6795,8 @@ // I is a member of an InterleaveGroup for Range.Start. If it's an adjunct // member of the IG, do not construct any Recipe for it. - const InterleaveGroup *IG = CM.getInterleavedAccessGroup(Instr); + const InterleaveGroup *IG = + CM.getInterleavedAccessGroup(Instr); if (IG && Instr != IG->getInsertPos() && Range.Start >= 2 && // Query is illegal for VF == 1 CM.getWideningDecision(Instr, Range.Start) == Index: lib/Transforms/Vectorize/VPlan.h =================================================================== --- lib/Transforms/Vectorize/VPlan.h +++ lib/Transforms/Vectorize/VPlan.h @@ -38,6 +38,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/IRBuilder.h" #include #include @@ -52,7 +53,8 @@ class BasicBlock; class DominatorTree; class InnerLoopVectorizer; -class InterleaveGroup; +template class InterleaveGroup; +class LoopInfo; class raw_ostream; class Value; class VPBasicBlock; @@ -771,11 +773,11 @@ /// or stores into one wide load/store and shuffles. class VPInterleaveRecipe : public VPRecipeBase { private: - const InterleaveGroup *IG; + const InterleaveGroup *IG; std::unique_ptr User; public: - VPInterleaveRecipe(const InterleaveGroup *IG, VPValue *Mask) + VPInterleaveRecipe(const InterleaveGroup *IG, VPValue *Mask) : VPRecipeBase(VPInterleaveSC), IG(IG) { if (Mask) // Create a VPInstruction to register as a user of the mask. User.reset(new VPUser({Mask})); @@ -793,7 +795,7 @@ /// Print the recipe. void print(raw_ostream &O, const Twine &Indent) const override; - const InterleaveGroup *getInterleaveGroup() { return IG; } + const InterleaveGroup *getInterleaveGroup() { return IG; } }; /// VPReplicateRecipe replicates a given instruction producing multiple scalar @@ -1464,6 +1466,34 @@ } }; +class VPInterleavedAccessInfo { +private: + DenseMap *> + InterleaveGroupMap; + +public: + VPInterleavedAccessInfo(VPlan &Plan, InterleavedAccessInfo &IAI); + + ~VPInterleavedAccessInfo() { + SmallPtrSet *, 4> DelSet; + // Avoid releasing a pointer twice. + for (auto &I : InterleaveGroupMap) + DelSet.insert(I.second); + for (auto *Ptr : DelSet) + delete Ptr; + } + + /// Get the interleave group that \p Instr belongs to. + /// + /// \returns nullptr if doesn't have such group. + InterleaveGroup * + getInterleaveGroup(VPInstruction *Instr) const { + if (InterleaveGroupMap.count(Instr)) + return InterleaveGroupMap.find(Instr)->second; + return nullptr; + } +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H Index: lib/Transforms/Vectorize/VPlan.cpp =================================================================== --- lib/Transforms/Vectorize/VPlan.cpp +++ lib/Transforms/Vectorize/VPlan.cpp @@ -680,3 +680,36 @@ } template void DomTreeBuilder::Calculate(VPDominatorTree &DT); + +VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, + InterleavedAccessInfo &IAI) { + DenseMap *, InterleaveGroup *> + Old2New; + + auto *TopRegion = cast(Plan.getEntry()); + ReversePostOrderTraversal RPOT(TopRegion->getEntry()); + for (VPBlockBase *Base : RPOT) { + VPBasicBlock *VPBB = Base->getEntryBasicBlock(); + for (auto I = VPBB->begin(), E = VPBB->end(); I != E; I++) { + assert(isa(&*I) && "Can only handle VPInstructions"); + auto *VPInst = cast(&*I); + auto *Inst = cast(VPInst->getUnderlyingValue()); + auto *IG = IAI.getInterleaveGroup(Inst); + if (!IG) + continue; + + auto NewIGIter = Old2New.find(IG); + if (NewIGIter == Old2New.end()) + Old2New[IG] = new InterleaveGroup( + IG->getFactor(), IG->isReverse(), IG->getAlignment()); + + if (Inst == IG->getInsertPos()) + Old2New[IG]->setInsertPos(VPInst); + + InterleaveGroupMap[VPInst] = Old2New[IG]; + InterleaveGroupMap[VPInst]->insertMember( + VPInst, IG->getIndex(Inst), + IG->isReverse() ? (-1) * int(IG->getFactor()) : IG->getFactor()); + } + } +} Index: lib/Transforms/Vectorize/VPlanValue.h =================================================================== --- lib/Transforms/Vectorize/VPlanValue.h +++ lib/Transforms/Vectorize/VPlanValue.h @@ -40,6 +40,7 @@ friend class VPBuilder; friend class VPlanHCFGTransforms; friend class VPBasicBlock; + friend class VPInterleavedAccessInfo; private: const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).