diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8472,7 +8472,7 @@ } } - auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()), + auto *Recipe = new VPReplicateRecipe(I, I, Plan->mapToVPValues(I->operands()), IsUniform, IsPredicated); // Find if I uses a predicated instruction. If so, it will use its scalar @@ -9654,18 +9654,17 @@ } void VPReplicateRecipe::execute(VPTransformState &State) { - Instruction *UI = getUnderlyingInstr(); if (State.Instance) { // Generate a single instance. assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); - State.ILV->scalarizeInstruction(UI, this, *State.Instance, - IsPredicated, State); + State.ILV->scalarizeInstruction(Instr, this, *State.Instance, IsPredicated, + State); // Insert scalar instance packing it into a vector. if (AlsoPack && State.VF.isVector()) { // If we're constructing lane 0, initialize to start from poison. if (State.Instance->Lane.isFirstLane()) { assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); - Value *Poison = PoisonValue::get( - VectorType::get(UI->getType(), State.VF)); + Value *Poison = + PoisonValue::get(VectorType::get(Instr->getType(), State.VF)); State.set(this, Poison, State.Instance->Part); } State.ILV->packScalarIntoVectorValue(this, *State.Instance, State); @@ -9676,10 +9675,12 @@ if (IsUniform) { // If the recipe is uniform across all parts (instead of just per VF), only // generate a single instance. - if ((!mayHaveSideEffects() || isa(UI)) && - all_of(operands(), [](VPValue *Op) { return Op->isDefinedOutsideVectorRegions(); })) { - State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), IsPredicated, - State); + if ((!mayHaveSideEffects() || isa(Instr)) && + all_of(operands(), [](VPValue *Op) { + return Op->isDefinedOutsideVectorRegions(); + })) { + State.ILV->scalarizeInstruction(Instr, this, VPIteration(0, 0), + IsPredicated, State); if (user_begin() != user_end()) { for (unsigned Part = 1; Part < State.UF; ++Part) State.set(this, State.get(this, VPIteration(0, 0)), @@ -9691,17 +9692,17 @@ // Uniform within VL means we need to generate lane 0 only for each // unrolled copy. for (unsigned Part = 0; Part < State.UF; ++Part) - State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, 0), + State.ILV->scalarizeInstruction(Instr, this, VPIteration(Part, 0), IsPredicated, State); return; } // A store of a loop varying value to a loop invariant address only // needs a single copy of the store. - if (isa(UI) && !getOperand(1)->getDef()) { + if (isa(Instr) && !getOperand(1)->getDef()) { auto Lane = VPLane::getLastLaneForVF(State.VF); - State.ILV->scalarizeInstruction(UI, this, VPIteration(State.UF - 1, Lane), IsPredicated, - State); + State.ILV->scalarizeInstruction( + Instr, this, VPIteration(State.UF - 1, Lane), IsPredicated, State); return; } @@ -9710,7 +9711,7 @@ const unsigned EndLane = State.VF.getKnownMinValue(); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, Lane), + State.ILV->scalarizeInstruction(Instr, this, VPIteration(Part, Lane), IsPredicated, State); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1537,6 +1537,11 @@ /// single copy of widened type for all lanes. If the instruction is known to be /// uniform only one copy, per lane zero, will be generated. class VPReplicateRecipe : public VPRecipeBase, public VPValue { + /// The instruction being replicated. It is only used for opcode, type info, + /// attributes and metadata. Operands and uses of the result are modeled in + /// VPlan. + const Instruction *Instr; + /// Indicator if only a single replica per lane is needed. bool IsUniform; @@ -1548,9 +1553,11 @@ public: template - VPReplicateRecipe(Instruction *I, iterator_range Operands, - bool IsUniform, bool IsPredicated = false) - : VPRecipeBase(VPReplicateSC, Operands), VPValue(VPVReplicateSC, I, this), + VPReplicateRecipe(const Instruction *I, Value *UnderlyingValue, + iterator_range Operands, bool IsUniform, + bool IsPredicated = false) + : VPRecipeBase(VPReplicateSC, Operands), + VPValue(VPVReplicateSC, UnderlyingValue, this), Instr(I), IsUniform(IsUniform), IsPredicated(IsPredicated) { // Retain the previous behavior of predicateInstructions(), where an // insert-element of a predicated instruction got hoisted into the @@ -1603,6 +1610,8 @@ "Op must be an operand of the recipe"); return true; } + + const Instruction &getInstruction() const { return *Instr; } }; /// A recipe for generating conditional branches on the bits of a mask. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -44,7 +44,10 @@ case VPWidenMemoryInstructionSC: { return cast(this)->isStore(); } - case VPReplicateSC: + case VPReplicateSC: { + auto *R = cast(this); + return R->getInstruction().mayWriteToMemory(); + } case VPWidenCallSC: return cast(getVPSingleValue()->getUnderlyingValue()) ->mayWriteToMemory(); @@ -75,7 +78,10 @@ case VPWidenMemoryInstructionSC: { return !cast(this)->isStore(); } - case VPReplicateSC: + case VPReplicateSC: { + auto *R = cast(this); + return R->getInstruction().mayReadFromMemory(); + } case VPWidenCallSC: return cast(getVPSingleValue()->getUnderlyingValue()) ->mayReadFromMemory(); @@ -125,7 +131,7 @@ } case VPReplicateSC: { auto *R = cast(this); - return R->getUnderlyingInstr()->mayHaveSideEffects(); + return R->getInstruction().mayHaveSideEffects(); } default: return true; @@ -908,11 +914,11 @@ VPSlotTracker &SlotTracker) const { O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); - if (!getUnderlyingInstr()->getType()->isVoidTy()) { + if (!Instr->getType()->isVoidTy()) { printAsOperand(O, SlotTracker); O << " = "; } - if (auto *CB = dyn_cast(getUnderlyingInstr())) { + if (auto *CB = dyn_cast(Instr)) { O << "call @" << CB->getCalledFunction()->getName() << "("; interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), O, [&O, &SlotTracker](VPValue *Op) { @@ -920,7 +926,7 @@ }); O << ")"; } else { - O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; + O << Instruction::getOpcodeName(Instr->getOpcode()) << " "; printOperands(O, SlotTracker); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -167,10 +167,10 @@ continue; if (NeedsDuplicating) { - Instruction *I = cast( - cast(SinkCandidate)->getUnderlyingValue()); - auto *Clone = - new VPReplicateRecipe(I, SinkCandidate->operands(), true, false); + const Instruction *I = + &cast(SinkCandidate)->getInstruction(); + auto *Clone = new VPReplicateRecipe(I, nullptr, SinkCandidate->operands(), + true, false); // TODO: add ".cloned" suffix to name of Clone's VPValue. Clone->insertBefore(SinkCandidate); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -998,11 +998,11 @@ ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]> -; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%addr>, vp<[[STEPS]]> +; CHECK-NEXT: CLONE vp<[[CLONED_GEP:%.+]]> = getelementptr ir<%addr>, vp<[[STEPS]]> ; CHECK-NEXT: Successor(s): loop.body ; CHECK-EMPTY: ; CHECK-NEXT: loop.body: -; CHECK-NEXT: WIDEN ir<%0> = load ir<%gep> +; CHECK-NEXT: WIDEN ir<%0> = load vp<[[CLONED_GEP]]> ; CHECK-NEXT: WIDEN ir<%pred> = fcmp ir<%0>, ir<0.000000e+00> ; CHECK-NEXT: Successor(s): then ; CHECK-EMPTY: diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -911,8 +911,8 @@ Args.push_back(&Op1); Args.push_back(&Op2); - VPReplicateRecipe Recipe(nullptr, make_range(Args.begin(), Args.end()), true, - false); + VPReplicateRecipe Recipe(nullptr, nullptr, + make_range(Args.begin(), Args.end()), true, false); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR));