diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -549,6 +549,7 @@ /// BlockInMask is non-null. Use \p State to translate given VPValues to IR /// values in the vectorized loop. void vectorizeInterleaveGroup(const InterleaveGroup *Group, + ArrayRef VPDefs, VPTransformState &State, VPValue *Addr, VPValue *BlockInMask = nullptr); @@ -2321,8 +2322,8 @@ // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B void InnerLoopVectorizer::vectorizeInterleaveGroup( - const InterleaveGroup *Group, VPTransformState &State, - VPValue *Addr, VPValue *BlockInMask) { + const InterleaveGroup *Group, ArrayRef VPDefs, + VPTransformState &State, VPValue *Addr, VPValue *BlockInMask) { Instruction *Instr = Group->getInsertPos(); const DataLayout &DL = Instr->getModule()->getDataLayout(); @@ -2424,6 +2425,7 @@ // For each member in the group, shuffle out the appropriate data from the // wide loads. + unsigned J = 0; for (unsigned I = 0; I < InterleaveFactor; ++I) { Instruction *Member = Group->getMember(I); @@ -2448,8 +2450,9 @@ if (Group->isReverse()) StridedVec = reverseVector(StridedVec); - VectorLoopValueMap.setVectorValue(Member, Part, StridedVec); + State.set(VPDefs[J], Member, StridedVec, Part); } + ++J; } return; } @@ -7307,9 +7310,8 @@ return BlockMaskCache[BB] = BlockMask; } -VPWidenMemoryInstructionRecipe * -VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, - VPlanPtr &Plan) { +VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, + VPlanPtr &Plan) { assert((isa(I) || isa(I)) && "Must be called with either a load or store"); @@ -7337,6 +7339,12 @@ Mask = createBlockInMask(I->getParent(), Plan); VPValue *Addr = Plan->getOrAddVPValue(getLoadStorePointerOperand(I)); + auto II = InsertPtToGroup.find(I); + if (II != InsertPtToGroup.end()) { + auto *IG = II->second; + return new VPInterleaveRecipe(IG, Addr, Mask); + } + if (LoadInst *Load = dyn_cast(I)) return new VPWidenMemoryInstructionRecipe(*Load, Addr, Mask); @@ -7687,6 +7695,7 @@ DenseMap PredInst2Recipe; SmallPtrSet *, 1> InterleaveGroups; + SmallPtrSet DeadInterleaveGroupMembers; VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, PSE, Builder); @@ -7732,11 +7741,30 @@ if (!getDecisionAndClampRange(applyIG, Range)) continue; InterleaveGroups.insert(IG); + RecipeBuilder.recordInterleaveGroup(IG); for (unsigned i = 0; i < IG->getFactor(); i++) - if (Instruction *Member = IG->getMember(i)) + if (Instruction *Member = IG->getMember(i)) { RecipeBuilder.recordRecipeOf(Member); + if (Member != IG->getInsertPos()) + DeadInterleaveGroupMembers.insert(Member); + } }; + auto skipDeadInterleaveMembers = + [&DeadInterleaveGroupMembers](Instruction *I) { + BasicBlock *BB = I->getParent(); + for (auto &I : make_range(I->getIterator(), BB->end())) + if (!DeadInterleaveGroupMembers.contains(&I)) + return &I; + llvm_unreachable("Need to find a valid insert point"); + }; + // Mark instructions we'll need to sink later and their targets as + // ingredients whose recipe we'll need to record. + for (auto &Entry : SinkAfter) { + RecipeBuilder.recordRecipeOf(skipDeadInterleaveMembers(Entry.first)); + RecipeBuilder.recordRecipeOf(skipDeadInterleaveMembers(Entry.second)); + } + // --------------------------------------------------------------------------- // Build initial VPlan: Scan the body of the loop in a topological order to // visit each basic block after having visited its predecessor basic blocks. @@ -7772,7 +7800,8 @@ // First filter out irrelevant instructions, to ensure no recipes are // built for them. - if (isa(Instr) || DeadInstructions.count(Instr)) + if (isa(Instr) || DeadInstructions.count(Instr) || + DeadInterleaveGroupMembers.contains(Instr)) continue; if (auto Recipe = @@ -7820,31 +7849,13 @@ // Apply Sink-After legal constraints. for (auto &Entry : SinkAfter) { - VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first); - VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second); + VPRecipeBase *Sink = + RecipeBuilder.getRecipe(skipDeadInterleaveMembers(Entry.first)); + VPRecipeBase *Target = + RecipeBuilder.getRecipe(skipDeadInterleaveMembers(Entry.second)); Sink->moveAfter(Target); } - // Interleave memory: for each Interleave Group we marked earlier as relevant - // for this VPlan, replace the Recipes widening its memory instructions with a - // single VPInterleaveRecipe at its insertion point. - for (auto IG : InterleaveGroups) { - auto *Recipe = cast( - RecipeBuilder.getRecipe(IG->getInsertPos())); - (new VPInterleaveRecipe(IG, Recipe->getAddr(), Recipe->getMask())) - ->insertBefore(Recipe); - - for (unsigned i = 0; i < IG->getFactor(); ++i) - if (Instruction *Member = IG->getMember(i)) { - if (!Member->getType()->isVoidTy()) { - VPValue *OriginalV = Plan->getVPValue(Member); - Plan->removeVPValueFor(Member); - OriginalV->replaceAllUsesWith(Plan->getOrAddVPValue(Member)); - } - RecipeBuilder.getRecipe(Member)->eraseFromParent(); - } - } - // Adjust the recipes for any inloop reductions. if (Range.Start.isVector()) adjustRecipesForInLoopReductions(Plan, RecipeBuilder); @@ -8066,7 +8077,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Interleave group being replicated."); - State.ILV->vectorizeInterleaveGroup(IG, State, getAddr(), getMask()); + State.ILV->vectorizeInterleaveGroup(IG, defined_values(), State, getAddr(), + getMask()); } void VPReductionRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -53,6 +53,8 @@ // marked by having a nullptr entry in this map. DenseMap Ingredient2Recipe; + DenseMap *> InsertPtToGroup; + /// Check if \p I can be widened at the start of \p Range and possibly /// decrease the range such that the returned value holds for the entire \p /// Range. The function should not be called for memory instructions or calls. @@ -61,8 +63,8 @@ /// Check if the load or store instruction \p I should widened for \p /// Range.Start and potentially masked. Such instructions are handled by a /// recipe that takes an additional VPInstruction for the mask. - VPWidenMemoryInstructionRecipe * - tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan); + VPRecipeBase *tryToWidenMemory(Instruction *I, VFRange &Range, + VPlanPtr &Plan); /// Check if an induction recipe should be constructed for \I. If so build and /// return it. If not, return null. @@ -129,6 +131,10 @@ Ingredient2Recipe[I] = nullptr; } + void recordInterleaveGroup(const InterleaveGroup *IG) { + InsertPtToGroup[IG->getInsertPos()] = IG; + } + /// Return the recipe created for given ingredient. VPRecipeBase *getRecipe(Instruction *I) { assert(Ingredient2Recipe.count(I) && diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1040,13 +1040,20 @@ /// VPInterleaveRecipe is a recipe for transforming an interleave group of load /// or stores into one wide load/store and shuffles. -class VPInterleaveRecipe : public VPRecipeBase, public VPUser { +class VPInterleaveRecipe : public VPRecipeBase, public VPDef, public VPUser { const InterleaveGroup *IG; public: VPInterleaveRecipe(const InterleaveGroup *IG, VPValue *Addr, VPValue *Mask) - : VPRecipeBase(VPInterleaveSC), VPUser({Addr}), IG(IG) { + : VPRecipeBase(VPInterleaveSC), VPUser(Addr), IG(IG) { + for (unsigned i = 0; i < IG->getFactor(); ++i) + if (Instruction *I = IG->getMember(i)) { + if (I->getType()->isVoidTy()) + continue; + new VPValue(I, this); + } + if (Mask) addOperand(Mask); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -358,8 +358,12 @@ void VPBasicBlock::dropAllReferences(VPValue *NewValue) { for (VPRecipeBase &R : Recipes) { - if (auto *VPV = R.toVPValue()) - VPV->replaceAllUsesWith(NewValue); + if (VPValue *Def = R.toVPValue()) + Def->replaceAllUsesWith(NewValue); + else if (auto *IR = dyn_cast(&R)) { + for (auto *Def : IR->defined_values()) + Def->replaceAllUsesWith(NewValue); + } if (auto *User = R.toVPUser()) for (unsigned I = 0, E = User->getNumOperands(); I != E; I++) diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -909,8 +909,8 @@ ; CHECK: %[[VSHUF1:.+]] = shufflevector <8 x i16> %wide.vec, <8 x i16> undef, <4 x i32> ; CHECK: %[[VSHUF:.+]] = shufflevector <4 x i16> %vector.recur, <4 x i16> %[[VSHUF1]], <4 x i32> ; CHECK: sext <4 x i16> %[[VSHUF0]] to <4 x i32> -; CHECK: sext <4 x i16> %[[VSHUF]] to <4 x i32> ; CHECK: sext <4 x i16> %[[VSHUF1]] to <4 x i32> +; CHECK: sext <4 x i16> %[[VSHUF]] to <4 x i32> ; CHECK: mul nsw <4 x i32> ; CHECK: mul nsw <4 x i32> diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "../lib/Transforms/Vectorize/VPlan.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "gtest/gtest.h" @@ -470,7 +471,8 @@ VPValue Addr; VPValue Mask; - VPInterleaveRecipe Recipe(nullptr, &Addr, &Mask); + InterleaveGroup IG(4, false, Align(4)); + VPInterleaveRecipe Recipe(&IG, &Addr, &Mask); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR));