diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2672,7 +2672,9 @@ // Place the cloned scalar in the new loop. Builder.Insert(Cloned); - // Add the cloned scalar to the scalar map entry. + // TODO: Set result for VPValue of VPReciplicateRecipe. This requires + // representing scalar values in VPTransformState. Add the cloned scalar to + // the scalar map entry. VectorLoopValueMap.setScalarValue(Instr, Instance, Cloned); // If we just cloned a new assumption, add it the assumption cache. @@ -7529,6 +7531,7 @@ auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()), IsUniform, IsPredicated); setRecipe(I, Recipe); + Plan->addOrReplaceVPValue(I, Recipe); // Find if I uses a predicated instruction. If so, it will use its scalar // value. Avoid hoisting the insert-element which packs the scalar value into @@ -8108,18 +8111,20 @@ void VPReplicateRecipe::execute(VPTransformState &State) { if (State.Instance) { // Generate a single instance. - State.ILV->scalarizeInstruction(Ingredient, *this, *State.Instance, - IsPredicated, State); + State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this, + *State.Instance, IsPredicated, State); // Insert scalar instance packing it into a vector. if (AlsoPack && State.VF.isVector()) { // If we're constructing lane 0, initialize to start from undef. if (State.Instance->Lane == 0) { assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); - Value *Undef = - UndefValue::get(VectorType::get(Ingredient->getType(), State.VF)); - State.ValueMap.setVectorValue(Ingredient, State.Instance->Part, Undef); + Value *Undef = UndefValue::get( + VectorType::get(getUnderlyingValue()->getType(), State.VF)); + State.ValueMap.setVectorValue(getUnderlyingInstr(), + State.Instance->Part, Undef); } - State.ILV->packScalarIntoVectorValue(Ingredient, *State.Instance); + State.ILV->packScalarIntoVectorValue(getUnderlyingInstr(), + *State.Instance); } return; } @@ -8130,7 +8135,7 @@ unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue(); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(Ingredient, *this, {Part, Lane}, + State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this, {Part, Lane}, IsPredicated, State); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -614,6 +614,10 @@ // hoisted into a VPBlockBase. return true; } + + /// Replace all operands of VPUsers in the block with \p NewValue and also + /// replaces all uses of VPValues defined in the block with NewValue. + virtual void dropAllReferences(VPValue *NewValue) = 0; }; /// VPRecipeBase is a base class modeling a sequence of one or more output IR @@ -1130,10 +1134,7 @@ /// copies of the original scalar type, one per lane, instead of producing a /// single copy of widened type for all lanes. If the instruction is known to be /// uniform only one copy, per lane zero, will be generated. -class VPReplicateRecipe : public VPRecipeBase, public VPUser { - /// The instruction being replicated. - Instruction *Ingredient; - +class VPReplicateRecipe : public VPRecipeBase, public VPUser, public VPValue { /// Indicator if only a single replica per lane is needed. bool IsUniform; @@ -1147,8 +1148,9 @@ template VPReplicateRecipe(Instruction *I, iterator_range Operands, bool IsUniform, bool IsPredicated = false) - : VPRecipeBase(VPReplicateSC), VPUser(Operands), Ingredient(I), - IsUniform(IsUniform), IsPredicated(IsPredicated) { + : VPRecipeBase(VPReplicateSC), VPUser(Operands), + VPValue(VPVReplicateSC, I), IsUniform(IsUniform), + IsPredicated(IsPredicated) { // Retain the previous behavior of predicateInstructions(), where an // insert-element of a predicated instruction got hoisted into the // predicated basic block iff it was its only user. This is achieved by @@ -1164,6 +1166,10 @@ return V->getVPRecipeID() == VPRecipeBase::VPReplicateSC; } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVReplicateSC; + } + /// Generate replicas of the desired Ingredient. Replicas will be generated /// for all parts and lanes unless a specific part and lane are specified in /// the \p State. @@ -1174,6 +1180,8 @@ /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; + + bool isUniform() const { return IsUniform; } }; /// A recipe for generating conditional branches on the bits of a mask. @@ -1413,13 +1421,11 @@ /// this VPBasicBlock, thereby "executing" the VPlan. void execute(struct VPTransformState *State) override; - /// Replace all operands of VPUsers in the block with \p NewValue and also - /// replaces all uses of VPValues defined in the block with NewValue. - void dropAllReferences(VPValue *NewValue); - /// Return the position of the first non-phi node recipe in the block. iterator getFirstNonPhi(); + void dropAllReferences(VPValue *NewValue) override; + private: /// Create an IR BasicBlock to hold the output instructions generated by this /// VPBasicBlock, and return it. Update the CFGState accordingly. @@ -1460,8 +1466,11 @@ IsReplicator(IsReplicator) {} ~VPRegionBlock() override { - if (Entry) + if (Entry) { + VPValue DummyValue; + Entry->dropAllReferences(&DummyValue); deleteCFG(Entry); + } } /// Method to support type inquiry through isa, cast, and dyn_cast. @@ -1506,6 +1515,8 @@ /// The method which generates the output IR instructions that correspond to /// this VPRegionBlock, thereby "executing" the VPlan. void execute(struct VPTransformState *State) override; + + void dropAllReferences(VPValue *NewValue) override; }; //===----------------------------------------------------------------------===// @@ -1680,8 +1691,13 @@ } ~VPlan() { - if (Entry) + if (Entry) { + VPValue DummyValue; + for (VPBlockBase *Block : depth_first(Entry)) + Block->dropAllReferences(&DummyValue); + VPBlockBase::deleteCFG(Entry); + } for (VPValue *VPV : VPValuesToFree) delete VPV; if (BackedgeTakenCount) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -125,6 +125,8 @@ return V; if (auto *V = dyn_cast(this)) return V; + if (auto *V = dyn_cast(this)) + return V; return nullptr; } @@ -139,6 +141,8 @@ return V; if (auto *V = dyn_cast(this)) return V; + if (auto *V = dyn_cast(this)) + return V; return nullptr; } @@ -223,14 +227,8 @@ void VPBlockBase::deleteCFG(VPBlockBase *Entry) { SmallVector Blocks; - VPValue DummyValue; - for (VPBlockBase *Block : depth_first(Entry)) { - // Drop all references in VPBasicBlocks and replace all uses with - // DummyValue. - if (auto *VPBB = dyn_cast(Block)) - VPBB->dropAllReferences(&DummyValue); + for (VPBlockBase *Block : depth_first(Entry)) Blocks.push_back(Block); - } for (VPBlockBase *Block : Blocks) delete Block; @@ -378,6 +376,13 @@ } } +void VPRegionBlock::dropAllReferences(VPValue *NewValue) { + for (VPBlockBase *Block : depth_first(Entry)) + // Drop all references in VPBasicBlocks and replace all uses with + // DummyValue. + Block->dropAllReferences(NewValue); +} + void VPRegionBlock::execute(VPTransformState *State) { ReversePostOrderTraversal RPOT(Entry); @@ -951,7 +956,7 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << "\"" << (IsUniform ? "CLONE " : "REPLICATE ") - << VPlanIngredient(Ingredient); + << VPlanIngredient(getUnderlyingInstr()); if (AlsoPack) O << " (S->V)"; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -91,7 +91,8 @@ VPMemoryInstructionSC, VPVWidenCallSC, VPVWidenSelectSC, - VPVWidenGEPSC + VPVWidenGEPSC, + VPVReplicateSC }; VPValue(Value *UV = nullptr, VPDef *Def = nullptr)