diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -514,14 +514,15 @@ /// variable canonicalization. It supports both VF = 1 for unrolled loops and /// arbitrary length vectors. void widenPHIInstruction(Instruction *PN, RecurrenceDescriptor *RdxDesc, - Value *StartV, unsigned UF, ElementCount VF); + Value *StartV, unsigned UF, ElementCount VF, + VPValue *Def, VPTransformState &State); /// A helper function to scalarize a single Instruction in the innermost loop. /// Generates a sequence of scalar instances for each lane between \p MinLane /// and \p MaxLane, times each part between \p MinPart and \p MaxPart, /// inclusive. Uses the VPValue operands from \p Operands instead of \p /// Instr's operands. - void scalarizeInstruction(Instruction *Instr, VPUser &Operands, + void scalarizeInstruction(Instruction *Instr, VPValue *Def, VPUser &Operands, const VPIteration &Instance, bool IfPredicateInstr, VPTransformState &State); @@ -2903,7 +2904,8 @@ } } -void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPUser &User, +void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def, + VPUser &User, const VPIteration &Instance, bool IfPredicateInstr, VPTransformState &State) { @@ -2924,6 +2926,8 @@ if (!IsVoidRetTy) Cloned->setName(Instr->getName() + ".cloned"); + State.Builder.SetInsertPoint(Builder.GetInsertBlock(), + Builder.GetInsertPoint()); // Replace the operands of the cloned instructions with their scalar // equivalents in the new loop. for (unsigned op = 0, e = User.getNumOperands(); op != e; ++op) { @@ -2940,10 +2944,7 @@ // Place the cloned scalar in the new loop. Builder.Insert(Cloned); - // TODO: Set result for VPValue of VPReciplicateRecipe. This requires - // representing scalar values in VPTransformState. Add the cloned scalar to - // the scalar map entry. - VectorLoopValueMap.setScalarValue(Instr, Instance, Cloned); + State.set(Def, Instr, Cloned, Instance); // If we just cloned a new assumption, add it the assumption cache. if (auto *II = dyn_cast(Cloned)) @@ -4627,7 +4628,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, RecurrenceDescriptor *RdxDesc, Value *StartV, unsigned UF, - ElementCount VF) { + ElementCount VF, VPValue *Def, + VPTransformState &State) { assert(!VF.isScalable() && "scalable vectors not yet supported."); PHINode *P = cast(PN); if (EnableVPlanNativePath) { @@ -4638,7 +4640,7 @@ Type *VecTy = (VF.isScalar()) ? PN->getType() : VectorType::get(PN->getType(), VF); Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi"); - VectorLoopValueMap.setVectorValue(P, 0, VecPhi); + State.set(Def, P, VecPhi, 0); OrigPHIsToFix.push_back(P); return; @@ -4691,7 +4693,7 @@ // This is phase one of vectorizing PHIs. Value *EntryPart = PHINode::Create( VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt()); - VectorLoopValueMap.setVectorValue(P, Part, EntryPart); + State.set(Def, P, EntryPart, Part); if (StartV) { // Make sure to add the reduction start value only to the // first unroll part. @@ -4743,7 +4745,7 @@ Value *SclrGep = emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II); SclrGep->setName("next.gep"); - VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep); + State.set(Def, P, SclrGep, {Part, Lane}); } } return; @@ -4791,7 +4793,7 @@ StartOffset, Builder.CreateVectorSplat(VF.getKnownMinValue(), ScalarStepValue), "vector.gep")); - VectorLoopValueMap.setVectorValue(P, Part, GEP); + State.set(Def, P, GEP, Part); } } } @@ -8583,6 +8585,10 @@ auto *PHIRecipe = Instr->getType()->isVoidTy() ? nullptr : new VPPredInstPHIRecipe(Plan->getOrAddVPValue(Instr)); + if (PHIRecipe) { + Plan->removeVPValueFor(Instr); + Plan->addVPValue(Instr, PHIRecipe); + } auto *Exit = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", PredRecipe); VPRegionBlock *Region = new VPRegionBlock(Entry, Exit, RegionName, true); @@ -9039,7 +9045,8 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) { Value *StartV = getStartValue() ? getStartValue()->getLiveInIRValue() : nullptr; - State.ILV->widenPHIInstruction(Phi, RdxDesc, StartV, State.UF, State.VF); + State.ILV->widenPHIInstruction(cast(getUnderlyingValue()), RdxDesc, + StartV, State.UF, State.VF, this, State); } void VPBlendRecipe::execute(VPTransformState &State) { @@ -9078,7 +9085,7 @@ } } for (unsigned Part = 0; Part < State.UF; ++Part) - State.ValueMap.setVectorValue(Phi, Part, Entry[Part]); + State.set(this, Phi, Entry[Part], Part); } void VPInterleaveRecipe::execute(VPTransformState &State) { @@ -9122,7 +9129,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { if (State.Instance) { // Generate a single instance. assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); - State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this, + State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *this, *State.Instance, IsPredicated, State); // Insert scalar instance packing it into a vector. if (AlsoPack && State.VF.isVector()) { @@ -9131,11 +9138,9 @@ assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); Value *Poison = PoisonValue::get( VectorType::get(getUnderlyingValue()->getType(), State.VF)); - State.ValueMap.setVectorValue(getUnderlyingInstr(), - State.Instance->Part, Poison); + State.set(this, getUnderlyingInstr(), Poison, State.Instance->Part); } - State.ILV->packScalarIntoVectorValue(getUnderlyingInstr(), - *State.Instance); + State.ILV->packScalarIntoVectorValue(this, *State.Instance, State); } return; } @@ -9148,8 +9153,8 @@ "Can't scalarize a scalable vector"); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this, {Part, Lane}, - IsPredicated, State); + State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *this, + {Part, Lane}, IsPredicated, State); } void VPBranchOnMaskRecipe::execute(VPTransformState &State) { @@ -9186,28 +9191,31 @@ BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); assert(PredicatingBB && "Predicated block has no single predecessor."); + auto *RepR = dyn_cast(getOperand(0)); + unsigned Part = State.Instance->Part; // By current pack/unpack logic we need to generate only a single phi node: if // a vector value for the predicated instruction exists at this point it means // the instruction has vector users only, and a phi for the vector value is // needed. In this case the recipe of the predicated instruction is marked to // also do that packing, thereby "hoisting" the insert-element sequence. // Otherwise, a phi node for the scalar value is needed. - unsigned Part = State.Instance->Part; - Instruction *PredInst = - cast(getOperand(0)->getUnderlyingValue()); - if (State.ValueMap.hasVectorValue(PredInst, Part)) { - Value *VectorValue = State.ValueMap.getVectorValue(PredInst, Part); + if (!State.hasVectorValue(getOperand(0), Part)) { + Type *PredInstType = RepR->getUnderlyingInstr()->getType(); + PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); + Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), + PredicatingBB); + Phi->addIncoming(ScalarPredInst, PredicatedBB); + State.set(this, Phi, *State.Instance); + State.set(getOperand(0), Phi, *State.Instance); + + } else { + Value *VectorValue = State.get(getOperand(0), Part); InsertElementInst *IEI = cast(VectorValue); PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. - State.ValueMap.resetVectorValue(PredInst, Part, VPhi); // Update cache. - } else { - Type *PredInstType = PredInst->getType(); - PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); - Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), PredicatingBB); - Phi->addIncoming(ScalarPredInst, PredicatedBB); - State.ValueMap.resetScalarValue(PredInst, *State.Instance, Phi); + State.set(this, VPhi, VPhi, Part); + State.set(getOperand(0), VPhi, Part); } } @@ -9291,8 +9299,6 @@ if (hasVectorValue(Def, Part)) return Data.PerPartOutput[Def][Part]; - // TODO: Remove the callback once all scalar recipes are managed using - // VPValues. if (!hasScalarValue(Def, {Part, 0})) return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part); @@ -9330,7 +9336,7 @@ } else { // Initialize packing with insertelements to start from undef. assert(!VF.isScalable() && "VF is assumed to be non scalable."); - Value *Undef = UndefValue::get(VectorType::get(LastInst->getType(), VF)); + Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF)); set(Def, Undef, Part); for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) ILV->packScalarIntoVectorValue(Def, {Part, Lane}, *this); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -307,6 +307,7 @@ } Data.PerPartOutput[Def][Part] = V; } + void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part); void reset(VPValue *Def, Value *IRDef, Value *V, unsigned Part); void set(VPValue *Def, Value *IRDef, Value *V, const VPIteration &Instance); @@ -990,7 +991,7 @@ /// A recipe for handling all phi nodes except for integer and FP inductions. /// For reduction PHIs, RdxDesc must point to the corresponding recurrence /// descriptor and the start value is the first operand of the recipe. -class VPWidenPHIRecipe : public VPRecipeBase, public VPUser { +class VPWidenPHIRecipe : public VPRecipeBase, public VPValue, public VPUser { PHINode *Phi; /// Descriptor for a reduction PHI. @@ -1006,9 +1007,9 @@ } /// Create a VPWidenPHIRecipe for \p Phi - VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) { - new VPValue(Phi, this); - } + VPWidenPHIRecipe(PHINode *Phi) + : VPRecipeBase(VPWidenPHISC), VPValue(VPValue::VPVWidenPHISC, Phi, this), + Phi(Phi) {} ~VPWidenPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. @@ -1031,7 +1032,7 @@ /// A recipe for vectorizing a phi-node as a sequence of mask-based select /// instructions. -class VPBlendRecipe : public VPRecipeBase, public VPUser { +class VPBlendRecipe : public VPRecipeBase, public VPUser, public VPValue { PHINode *Phi; public: @@ -1039,8 +1040,8 @@ /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value /// might be incoming with a full mask for which there is no VPValue. VPBlendRecipe(PHINode *Phi, ArrayRef Operands) - : VPRecipeBase(VPBlendSC), VPUser(Operands), Phi(Phi) { - new VPValue(Phi, this); + : VPRecipeBase(VPBlendSC), VPUser(Operands), + VPValue(VPValue::VPVBlendSC, Phi, this), Phi(Phi) { assert(Operands.size() > 0 && ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && "Expected either a single incoming value or a positive even number " @@ -1235,6 +1236,8 @@ VPSlotTracker &SlotTracker) const override; bool isUniform() const { return IsUniform; } + + bool isPacked() const { return AlsoPack; } }; /// A recipe for generating conditional branches on the bits of a mask. @@ -1279,15 +1282,13 @@ /// order to merge values that are set under such a branch and feed their uses. /// The phi nodes can be scalar or vector depending on the users of the value. /// This recipe works in concert with VPBranchOnMaskRecipe. -class VPPredInstPHIRecipe : public VPRecipeBase, public VPUser { - +class VPPredInstPHIRecipe : public VPRecipeBase, public VPUser, public VPValue { public: /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi /// nodes after merging back from a Branch-on-Mask. VPPredInstPHIRecipe(VPValue *PredV) - : VPRecipeBase(VPPredInstPHISC), VPUser(PredV) { - new VPValue(PredV->getUnderlyingValue(), this); - } + : VPRecipeBase(VPPredInstPHISC), VPUser(PredV), + VPValue(VPValue::VPVPredInstPHI, nullptr, this) {} ~VPPredInstPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -223,18 +223,17 @@ if (hasScalarValue(Def, Instance)) return Data.PerPartScalars[Def][Instance.Part][Instance.Lane]; - if (hasVectorValue(Def, Instance.Part)) { - assert(Data.PerPartOutput.count(Def)); - auto *VecPart = Data.PerPartOutput[Def][Instance.Part]; - if (!VecPart->getType()->isVectorTy()) { - assert(Instance.Lane == 0 && "cannot get lane > 0 for scalar"); - return VecPart; - } - // TODO: Cache created scalar values. - return Builder.CreateExtractElement(VecPart, - Builder.getInt32(Instance.Lane)); + assert(hasVectorValue(Def, Instance.Part)); + auto *VecPart = Data.PerPartOutput[Def][Instance.Part]; + if (!VecPart->getType()->isVectorTy()) { + assert(Instance.Lane == 0 && "cannot get lane > 0 for scalar"); + return VecPart; } - return Callback.getOrCreateScalarValue(VPValue2Value[Def], Instance); + // TODO: Cache created scalar values. + auto *Extract = + Builder.CreateExtractElement(VecPart, Builder.getInt32(Instance.Lane)); + // set(Def, Extract, Instance); + return Extract; } BasicBlock * @@ -914,7 +913,7 @@ void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << "WIDEN-PHI " << VPlanIngredient(Phi); + O << "WIDEN-PHI " << VPlanIngredient(getUnderlyingValue()); } void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, @@ -971,6 +970,8 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << "PHI-PREDICATED-INSTRUCTION "; + printAsOperand(O, SlotTracker); + O << " = "; printOperands(O, SlotTracker); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -86,6 +86,10 @@ assert(NewRecipe->getNumDefinedValues() == 0 && "Only recpies with zero or one defined values expected"); Ingredient->eraseFromParent(); + Plan->removeVPValueFor(Inst); + for (auto *Def : NewRecipe->definedValues()) { + Plan->addVPValue(Inst, Def); + } } } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -90,13 +90,17 @@ /// type identification. enum { VPValueSC, + VPVBlendSC, VPVInstructionSC, VPVMemoryInstructionSC, + VPVPredInstPHI, VPVReductionSC, VPVReplicateSC, VPVWidenSC, VPVWidenCallSC, VPVWidenGEPSC, + VPVWidenIntOrFpIndcutionSC, + VPVWidenPHISC, VPVWidenSelectSC, }; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -118,13 +118,13 @@ ; ; CHECK: N5 [label = ; CHECK-NEXT: "pred.udiv.continue:\n" + -; CHECK-NEXT: "PHI-PREDICATED-INSTRUCTION ir<%tmp4>\l" +; CHECK-NEXT: "PHI-PREDICATED-INSTRUCTION vp<%3> = ir<%tmp4>\l" ; CHECK-NEXT: ] ; ; CHECK: N7 [label = ; CHECK-NEXT: "for.inc:\n" + ; CHECK-NEXT: "EMIT vp<%4> = not ir<%cmp>\l" + -; CHECK-NEXT: "BLEND %d = ir<0>/vp<%4> ir<%tmp4>/ir<%cmp>\l" + +; CHECK-NEXT: "BLEND %d = ir<0>/vp<%4> vp<%3>/ir<%cmp>\l" + ; CHECK-NEXT: "CLONE ir<%idx> = getelementptr ir<%x>, ir<%i>\l" + ; CHECK-NEXT: "WIDEN store ir<%idx>, ir<%d>\l" ; CHECK-NEXT: ]