diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -546,6 +546,7 @@ /// BlockInMask is non-null. Use \p State to translate given VPValues to IR /// values in the vectorized loop. void vectorizeInterleaveGroup(const InterleaveGroup *Group, + ArrayRef VPDefs, VPTransformState &State, VPValue *Addr, VPValue *BlockInMask = nullptr); @@ -2318,8 +2319,8 @@ // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B void InnerLoopVectorizer::vectorizeInterleaveGroup( - const InterleaveGroup *Group, VPTransformState &State, - VPValue *Addr, VPValue *BlockInMask) { + const InterleaveGroup *Group, ArrayRef VPDefs, + VPTransformState &State, VPValue *Addr, VPValue *BlockInMask) { Instruction *Instr = Group->getInsertPos(); const DataLayout &DL = Instr->getModule()->getDataLayout(); @@ -2421,6 +2422,7 @@ // For each member in the group, shuffle out the appropriate data from the // wide loads. + unsigned J = 0; for (unsigned I = 0; I < InterleaveFactor; ++I) { Instruction *Member = Group->getMember(I); @@ -2445,8 +2447,9 @@ if (Group->isReverse()) StridedVec = reverseVector(StridedVec); - VectorLoopValueMap.setVectorValue(Member, Part, StridedVec); + State.set(VPDefs[J], Member, StridedVec, Part); } + ++J; } return; } @@ -7283,9 +7286,8 @@ return BlockMaskCache[BB] = BlockMask; } -VPWidenMemoryInstructionRecipe * -VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, - VPlanPtr &Plan) { +VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, + VPlanPtr &Plan) { assert((isa(I) || isa(I)) && "Must be called with either a load or store"); @@ -7313,6 +7315,23 @@ Mask = createBlockInMask(I->getParent(), Plan); VPValue *Addr = Plan->getOrAddVPValue(getLoadStorePointerOperand(I)); + auto II = InsertPtToGroup.find(I); + if (II != InsertPtToGroup.end()) { + auto *IG = II->second; + auto *InterleaveG = new VPInterleaveRecipe(IG, Addr, Mask); + + // If this is a load interleave group, add sub-values for each member. + if (isa(I)) { + unsigned j = 0; + for (unsigned i = 0; i < IG->getFactor(); i++) + if (Instruction *Member = IG->getMember(i)) { + Plan->addVPValue(Member, InterleaveG->getResult(j)); + j++; + } + } + return InterleaveG; + } + if (LoadInst *Load = dyn_cast(I)) return new VPWidenMemoryInstructionRecipe(*Load, Addr, Mask); @@ -7661,6 +7680,7 @@ DenseMap PredInst2Recipe; SmallPtrSet *, 1> InterleaveGroups; + SmallPtrSet DeadInterleaveGroupMembers; VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, PSE, Builder); @@ -7706,11 +7726,30 @@ if (!getDecisionAndClampRange(applyIG, Range)) continue; InterleaveGroups.insert(IG); + RecipeBuilder.recordInterleaveGroup(IG); for (unsigned i = 0; i < IG->getFactor(); i++) - if (Instruction *Member = IG->getMember(i)) + if (Instruction *Member = IG->getMember(i)) { RecipeBuilder.recordRecipeOf(Member); + if (Member != IG->getInsertPos()) + DeadInterleaveGroupMembers.insert(Member); + } }; + auto skipDeadInterleaveMembers = + [&DeadInterleaveGroupMembers](Instruction *I) { + BasicBlock *BB = I->getParent(); + for (auto &I : make_range(I->getIterator(), BB->end())) + if (!DeadInterleaveGroupMembers.contains(&I)) + return &I; + llvm_unreachable("Need to find a valid insert point"); + }; + // Mark instructions we'll need to sink later and their targets as + // ingredients whose recipe we'll need to record. + for (auto &Entry : SinkAfter) { + RecipeBuilder.recordRecipeOf(skipDeadInterleaveMembers(Entry.first)); + RecipeBuilder.recordRecipeOf(skipDeadInterleaveMembers(Entry.second)); + } + // --------------------------------------------------------------------------- // Build initial VPlan: Scan the body of the loop in a topological order to // visit each basic block after having visited its predecessor basic blocks. @@ -7746,7 +7785,8 @@ // First filter out irrelevant instructions, to ensure no recipes are // built for them. - if (isa(Instr) || DeadInstructions.count(Instr)) + if (isa(Instr) || DeadInstructions.count(Instr) || + DeadInterleaveGroupMembers.contains(Instr)) continue; if (auto Recipe = @@ -7794,33 +7834,13 @@ // Apply Sink-After legal constraints. for (auto &Entry : SinkAfter) { - VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first); - VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second); + VPRecipeBase *Sink = + RecipeBuilder.getRecipe(skipDeadInterleaveMembers(Entry.first)); + VPRecipeBase *Target = + RecipeBuilder.getRecipe(skipDeadInterleaveMembers(Entry.second)); Sink->moveAfter(Target); } - // Interleave memory: for each Interleave Group we marked earlier as relevant - // for this VPlan, replace the Recipes widening its memory instructions with a - // single VPInterleaveRecipe at its insertion point. - for (auto IG : InterleaveGroups) { - auto *Recipe = cast( - RecipeBuilder.getRecipe(IG->getInsertPos())); - (new VPInterleaveRecipe(IG, Recipe->getAddr(), Recipe->getMask())) - ->insertBefore(Recipe); - - for (unsigned i = 0; i < IG->getFactor(); ++i) - if (Instruction *Member = IG->getMember(i)) { - VPValue *NewVPV = nullptr; - if (!Member->getType()->isVoidTy()) { - NewVPV = new VPValue(Member); - Plan->getVPValue(Member)->replaceAllUsesWith(NewVPV); - } - RecipeBuilder.getRecipe(Member)->eraseFromParent(); - if (NewVPV) - Plan->addVPValue(Member, NewVPV); - } - } - // Adjust the recipes for any inloop reductions. if (Range.Start > 1) adjustRecipesForInLoopReductions(Plan, RecipeBuilder); @@ -8038,7 +8058,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Interleave group being replicated."); - State.ILV->vectorizeInterleaveGroup(IG, State, getAddr(), getMask()); + State.ILV->vectorizeInterleaveGroup(IG, getDefs(), State, getAddr(), + getMask()); } void VPReductionRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -53,6 +53,8 @@ // marked by having a nullptr entry in this map. DenseMap Ingredient2Recipe; + DenseMap *> InsertPtToGroup; + /// Check if \p I can be widened at the start of \p Range and possibly /// decrease the range such that the returned value holds for the entire \p /// Range. The function should not be called for memory instructions or calls. @@ -61,8 +63,8 @@ /// Check if the load or store instruction \p I should widened for \p /// Range.Start and potentially masked. Such instructions are handled by a /// recipe that takes an additional VPInstruction for the mask. - VPWidenMemoryInstructionRecipe * - tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan); + VPRecipeBase *tryToWidenMemory(Instruction *I, VFRange &Range, + VPlanPtr &Plan); /// Check if an induction recipe should be constructed for \I. If so build and /// return it. If not, return null. @@ -129,6 +131,10 @@ Ingredient2Recipe[I] = nullptr; } + void recordInterleaveGroup(const InterleaveGroup *IG) { + InsertPtToGroup[IG->getInsertPos()] = IG; + } + /// Return the recipe created for given ingredient. VPRecipeBase *getRecipe(Instruction *I) { assert(Ingredient2Recipe.count(I) && diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1017,22 +1017,33 @@ /// VPInterleaveRecipe is a recipe for transforming an interleave group of load /// or stores into one wide load/store and shuffles. -class VPInterleaveRecipe : public VPRecipeBase, public VPUser { +class VPInterleaveRecipe : public VPRecipeBase, public VPValue, public VPUser { const InterleaveGroup *IG; public: + SmallVector Defs; + VPInterleaveRecipe(const InterleaveGroup *IG, VPValue *Addr, VPValue *Mask) - : VPRecipeBase(VPInterleaveSC), VPUser({Addr}), IG(IG) { + : VPRecipeBase(VPRecipeBase::VPInterleaveSC), + VPValue(VPValue::VPVInterleaveSC), VPUser({Addr}), IG(IG) { if (Mask) addOperand(Mask); + for (unsigned i = 0; i < IG->getNumMembers(); i++) + Defs.push_back(new VPValue(this)); + } + ~VPInterleaveRecipe() override { + for (auto *Def : Defs) + delete Def; } - ~VPInterleaveRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC; } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVInterleaveSC; + } /// Return the address accessed by this recipe. VPValue *getAddr() const { @@ -1054,6 +1065,9 @@ VPSlotTracker &SlotTracker) const override; const InterleaveGroup *getInterleaveGroup() { return IG; } + + VPValue *getResult(unsigned Idx) { return Defs[Idx]; } + ArrayRef getDefs() { return Defs; } }; /// A recipe to represent inloop reduction operations, performing a reduction on diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -342,8 +342,15 @@ void VPBasicBlock::dropAllReferences(VPValue *NewValue) { for (VPRecipeBase &R : Recipes) { - if (auto *VPV = R.toVPValue()) - VPV->replaceAllUsesWith(NewValue); + if (auto *InterleaveR = dyn_cast(&R)) { + for (auto *Def : InterleaveR->Defs) + Def->replaceAllUsesWith(NewValue); + } else { + if (auto *VPV = R.toVPValue()) { + assert(VPV->isConcrete() && "virtual/sub-values need special handling"); + VPV->replaceAllUsesWith(NewValue); + } + } if (auto *User = R.toVPUser()) for (unsigned I = 0, E = User->getNumOperands(); I != E; I++) @@ -970,6 +977,8 @@ template void DomTreeBuilder::Calculate(VPDominatorTree &DT); void VPValue::replaceAllUsesWith(VPValue *New) { + assert((isConcrete() || isSubValue()) && + "can only replace concrete or sub-values"); for (unsigned J = 0; J < getNumUsers();) { VPUser *User = Users[J]; unsigned NumUsers = getNumUsers(); @@ -1047,9 +1056,11 @@ void VPSlotTracker::assignSlot(const VPValue *V) { assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!"); - const Value *UV = V->getUnderlyingValue(); - if (UV) - return; + if (!V->isSubValue()) { + const Value *UV = V->getUnderlyingValue(); + if (UV) + return; + } const auto *VPI = dyn_cast(V); if (VPI && !VPI->hasResult()) return; diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -909,8 +909,8 @@ ; CHECK: %[[VSHUF1:.+]] = shufflevector <8 x i16> %wide.vec, <8 x i16> undef, <4 x i32> ; CHECK: %[[VSHUF:.+]] = shufflevector <4 x i16> %vector.recur, <4 x i16> %[[VSHUF1]], <4 x i32> ; CHECK: sext <4 x i16> %[[VSHUF0]] to <4 x i32> -; CHECK: sext <4 x i16> %[[VSHUF]] to <4 x i32> ; CHECK: sext <4 x i16> %[[VSHUF1]] to <4 x i32> +; CHECK: sext <4 x i16> %[[VSHUF]] to <4 x i32> ; CHECK: mul nsw <4 x i32> ; CHECK: mul nsw <4 x i32>