diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -548,6 +548,7 @@ /// BlockInMask is non-null. Use \p State to translate given VPValues to IR /// values in the vectorized loop. void vectorizeInterleaveGroup(const InterleaveGroup *Group, + ArrayRef VPDefs, VPTransformState &State, VPValue *Addr, VPValue *BlockInMask = nullptr); @@ -2320,8 +2321,8 @@ // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> ; Interleave R,G,B elements // store <12 x i32> %interleaved.vec ; Write 4 tuples of R,G,B void InnerLoopVectorizer::vectorizeInterleaveGroup( - const InterleaveGroup *Group, VPTransformState &State, - VPValue *Addr, VPValue *BlockInMask) { + const InterleaveGroup *Group, ArrayRef VPDefs, + VPTransformState &State, VPValue *Addr, VPValue *BlockInMask) { Instruction *Instr = Group->getInsertPos(); const DataLayout &DL = Instr->getModule()->getDataLayout(); @@ -2423,6 +2424,7 @@ // For each member in the group, shuffle out the appropriate data from the // wide loads. + unsigned J = 0; for (unsigned I = 0; I < InterleaveFactor; ++I) { Instruction *Member = Group->getMember(I); @@ -2447,8 +2449,9 @@ if (Group->isReverse()) StridedVec = reverseVector(StridedVec); - VectorLoopValueMap.setVectorValue(Member, Part, StridedVec); + State.set(VPDefs[J], Member, StridedVec, Part); } + ++J; } return; } @@ -7300,9 +7303,8 @@ return BlockMaskCache[BB] = BlockMask; } -VPWidenMemoryInstructionRecipe * -VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, - VPlanPtr &Plan) { +VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I, VFRange &Range, + VPlanPtr &Plan) { assert((isa(I) || isa(I)) && "Must be called with either a load or store"); @@ -7330,6 +7332,12 @@ Mask = createBlockInMask(I->getParent(), Plan); VPValue *Addr = Plan->getOrAddVPValue(getLoadStorePointerOperand(I)); + auto II = InsertPtToGroup.find(I); + if (II != InsertPtToGroup.end()) { + auto *IG = II->second; + return new VPInterleaveRecipe(IG, Addr, Mask); + } + if (LoadInst *Load = dyn_cast(I)) return new VPWidenMemoryInstructionRecipe(*Load, Addr, Mask); @@ -7678,6 +7686,7 @@ DenseMap PredInst2Recipe; SmallPtrSet *, 1> InterleaveGroups; + SmallPtrSet DeadInterleaveGroupMembers; VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, PSE, Builder); @@ -7723,11 +7732,30 @@ if (!getDecisionAndClampRange(applyIG, Range)) continue; InterleaveGroups.insert(IG); + RecipeBuilder.recordInterleaveGroup(IG); for (unsigned i = 0; i < IG->getFactor(); i++) - if (Instruction *Member = IG->getMember(i)) + if (Instruction *Member = IG->getMember(i)) { RecipeBuilder.recordRecipeOf(Member); + if (Member != IG->getInsertPos()) + DeadInterleaveGroupMembers.insert(Member); + } }; + auto skipDeadInterleaveMembers = + [&DeadInterleaveGroupMembers](Instruction *I) { + BasicBlock *BB = I->getParent(); + for (auto &I : make_range(I->getIterator(), BB->end())) + if (!DeadInterleaveGroupMembers.contains(&I)) + return &I; + llvm_unreachable("Need to find a valid insert point"); + }; + // Mark instructions we'll need to sink later and their targets as + // ingredients whose recipe we'll need to record. + for (auto &Entry : SinkAfter) { + RecipeBuilder.recordRecipeOf(skipDeadInterleaveMembers(Entry.first)); + RecipeBuilder.recordRecipeOf(skipDeadInterleaveMembers(Entry.second)); + } + // --------------------------------------------------------------------------- // Build initial VPlan: Scan the body of the loop in a topological order to // visit each basic block after having visited its predecessor basic blocks. @@ -7763,7 +7791,8 @@ // First filter out irrelevant instructions, to ensure no recipes are // built for them. - if (isa(Instr) || DeadInstructions.count(Instr)) + if (isa(Instr) || DeadInstructions.count(Instr) || + DeadInterleaveGroupMembers.contains(Instr)) continue; if (auto Recipe = @@ -7811,31 +7840,13 @@ // Apply Sink-After legal constraints. for (auto &Entry : SinkAfter) { - VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first); - VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second); + VPRecipeBase *Sink = + RecipeBuilder.getRecipe(skipDeadInterleaveMembers(Entry.first)); + VPRecipeBase *Target = + RecipeBuilder.getRecipe(skipDeadInterleaveMembers(Entry.second)); Sink->moveAfter(Target); } - // Interleave memory: for each Interleave Group we marked earlier as relevant - // for this VPlan, replace the Recipes widening its memory instructions with a - // single VPInterleaveRecipe at its insertion point. - for (auto IG : InterleaveGroups) { - auto *Recipe = cast( - RecipeBuilder.getRecipe(IG->getInsertPos())); - (new VPInterleaveRecipe(IG, Recipe->getAddr(), Recipe->getMask())) - ->insertBefore(Recipe); - - for (unsigned i = 0; i < IG->getFactor(); ++i) - if (Instruction *Member = IG->getMember(i)) { - if (!Member->getType()->isVoidTy()) { - VPValue *OriginalV = Plan->getVPValue(Member); - Plan->removeVPValueFor(Member); - OriginalV->replaceAllUsesWith(Plan->getOrAddVPValue(Member)); - } - RecipeBuilder.getRecipe(Member)->eraseFromParent(); - } - } - // Adjust the recipes for any inloop reductions. if (Range.Start > 1) adjustRecipesForInLoopReductions(Plan, RecipeBuilder); @@ -8056,7 +8067,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Interleave group being replicated."); - State.ILV->vectorizeInterleaveGroup(IG, State, getAddr(), getMask()); + State.ILV->vectorizeInterleaveGroup(IG, defined_values(), State, getAddr(), + getMask()); } void VPReductionRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -53,6 +53,8 @@ // marked by having a nullptr entry in this map. DenseMap Ingredient2Recipe; + DenseMap *> InsertPtToGroup; + /// Check if \p I can be widened at the start of \p Range and possibly /// decrease the range such that the returned value holds for the entire \p /// Range. The function should not be called for memory instructions or calls. @@ -61,8 +63,8 @@ /// Check if the load or store instruction \p I should widened for \p /// Range.Start and potentially masked. Such instructions are handled by a /// recipe that takes an additional VPInstruction for the mask. - VPWidenMemoryInstructionRecipe * - tryToWidenMemory(Instruction *I, VFRange &Range, VPlanPtr &Plan); + VPRecipeBase *tryToWidenMemory(Instruction *I, VFRange &Range, + VPlanPtr &Plan); /// Check if an induction recipe should be constructed for \I. If so build and /// return it. If not, return null. @@ -129,6 +131,10 @@ Ingredient2Recipe[I] = nullptr; } + void recordInterleaveGroup(const InterleaveGroup *IG) { + InsertPtToGroup[IG->getInsertPos()] = IG; + } + /// Return the recipe created for given ingredient. VPRecipeBase *getRecipe(Instruction *I) { assert(Ingredient2Recipe.count(I) && diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1023,13 +1023,20 @@ /// VPInterleaveRecipe is a recipe for transforming an interleave group of load /// or stores into one wide load/store and shuffles. -class VPInterleaveRecipe : public VPRecipeBase, public VPUser { +class VPInterleaveRecipe : public VPRecipeBase, public VPDef { const InterleaveGroup *IG; public: VPInterleaveRecipe(const InterleaveGroup *IG, VPValue *Addr, VPValue *Mask) - : VPRecipeBase(VPInterleaveSC), VPUser({Addr}), IG(IG) { + : VPRecipeBase(VPInterleaveSC), VPDef(Addr), IG(IG) { + for (unsigned i = 0; i < IG->getFactor(); ++i) + if (Instruction *I = IG->getMember(i)) { + if (I->getType()->isVoidTy()) + continue; + new VPValue(I, this); + } + if (Mask) addOperand(Mask); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -358,8 +358,12 @@ void VPBasicBlock::dropAllReferences(VPValue *NewValue) { for (VPRecipeBase &R : Recipes) { - if (auto *VPV = R.toVPValue()) - VPV->replaceAllUsesWith(NewValue); + if (VPValue *Def = R.toVPValue()) + Def->replaceAllUsesWith(NewValue); + else if (auto *IR = dyn_cast(&R)) { + for (auto *Def : IR->defined_values()) + Def->replaceAllUsesWith(NewValue); + } if (auto *User = R.toVPUser()) for (unsigned I = 0, E = User->getNumOperands(); I != E; I++) diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -232,6 +232,9 @@ TinyPtrVector DefinedValues; public: + VPDef(std::initializer_list Operands) + : VPDef(ArrayRef(Operands)) {} + VPDef(ArrayRef Operands) : VPUser(Operands) {} template diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -909,8 +909,8 @@ ; CHECK: %[[VSHUF1:.+]] = shufflevector <8 x i16> %wide.vec, <8 x i16> undef, <4 x i32> ; CHECK: %[[VSHUF:.+]] = shufflevector <4 x i16> %vector.recur, <4 x i16> %[[VSHUF1]], <4 x i32> ; CHECK: sext <4 x i16> %[[VSHUF0]] to <4 x i32> -; CHECK: sext <4 x i16> %[[VSHUF]] to <4 x i32> ; CHECK: sext <4 x i16> %[[VSHUF1]] to <4 x i32> +; CHECK: sext <4 x i16> %[[VSHUF]] to <4 x i32> ; CHECK: mul nsw <4 x i32> ; CHECK: mul nsw <4 x i32>