diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8514,21 +8514,6 @@ auto *Recipe = new VPReplicateRecipe(I, Plan.mapToVPValues(I->operands()), IsUniform, IsPredicated); - // Find if I uses a predicated instruction. If so, it will use its scalar - // value. Avoid hoisting the insert-element which packs the scalar value into - // a vector value, as that happens iff all users use the vector value. - for (VPValue *Op : Recipe->operands()) { - auto *PredR = - dyn_cast_or_null(Op->getDefiningRecipe()); - if (!PredR) - continue; - auto *RepR = cast( - PredR->getOperand(0)->getDefiningRecipe()); - assert(RepR->isPredicated() && - "expected Replicate recipe to be predicated"); - RepR->setAlsoPack(false); - } - // Finalize the recipe for Instr, first if it is not predicated. if (!IsPredicated) { LLVM_DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n"); @@ -9516,7 +9501,7 @@ State.ILV->scalarizeInstruction(UI, this, *State.Instance, IsPredicated, State); // Insert scalar instance packing it into a vector. - if (AlsoPack && State.VF.isVector()) { + if (shouldPack() && State.VF.isVector()) { // If we're constructing lane 0, initialize to start from poison. if (State.Instance->Lane.isFirstLane()) { assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -166,9 +166,8 @@ /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it /// is predicated. \return \p VPBB augmented with this new recipe if \p I is /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new - /// Region. Update the packing decision of predicated instructions if they - /// feed \p I. Range.End may be decreased to ensure same recipe behavior from - /// \p Range.Start to \p Range.End. + /// Region. Range.End may be decreased to ensure same recipe behavior from \p + /// Range.Start to \p Range.End. VPBasicBlock *handleReplication( Instruction *I, VFRange &Range, VPBasicBlock *VPBB, VPlan &Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1497,22 +1497,12 @@ /// Indicator if the replicas are also predicated. bool IsPredicated; - /// Indicator if the scalar values should also be packed into a vector. - bool AlsoPack; - public: template VPReplicateRecipe(Instruction *I, iterator_range Operands, bool IsUniform, bool IsPredicated = false) : VPRecipeBase(VPDef::VPReplicateSC, Operands), VPValue(this, I), - IsUniform(IsUniform), IsPredicated(IsPredicated) { - // Retain the previous behavior of predicateInstructions(), where an - // insert-element of a predicated instruction got hoisted into the - // predicated basic block iff it was its only user. This is achieved by - // having predicated instructions also pack their values into a vector by - // default unless they have a replicated user which uses their scalar value. - AlsoPack = IsPredicated && !I->use_empty(); - } + IsUniform(IsUniform), IsPredicated(IsPredicated) {} ~VPReplicateRecipe() override = default; @@ -1523,8 +1513,6 @@ /// the \p State. void execute(VPTransformState &State) override; - void setAlsoPack(bool Pack) { AlsoPack = Pack; } - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, @@ -1533,8 +1521,6 @@ bool isUniform() const { return IsUniform; } - bool isPacked() const { return AlsoPack; } - bool isPredicated() const { return IsPredicated; } /// Returns true if the recipe only uses the first lane of operand \p Op. @@ -1550,6 +1536,10 @@ "Op must be an operand of the recipe"); return true; } + + /// Returns true if the recipe is used by a widened instruction. In this case, + /// the scalar values are also packed in a vector. + bool shouldPack() const; }; /// A recipe for generating conditional branches on the bits of a mask. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -921,6 +921,16 @@ "outside of loop)"; } +bool VPReplicateRecipe::shouldPack() const { + // Find if the recipe is used by a widened instruction. In this case, also + // pack the scalar values in a vector. + return any_of(users(), [](const VPUser *U) { + if (auto *PredR = dyn_cast(U)) + return any_of(PredR->users(), + [](const VPUser *U) { return !isa(U); }); + return false; + }); +} void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); @@ -941,7 +951,7 @@ printOperands(O, SlotTracker); } - if (AlsoPack) + if (shouldPack()) O << " (S->V)"; } #endif