diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -514,14 +514,15 @@ /// variable canonicalization. It supports both VF = 1 for unrolled loops and /// arbitrary length vectors. void widenPHIInstruction(Instruction *PN, RecurrenceDescriptor *RdxDesc, - Value *StartV, unsigned UF, ElementCount VF); + Value *StartV, unsigned UF, ElementCount VF, + VPValue *Def, VPTransformState &State); /// A helper function to scalarize a single Instruction in the innermost loop. /// Generates a sequence of scalar instances for each lane between \p MinLane /// and \p MaxLane, times each part between \p MinPart and \p MaxPart, /// inclusive. Uses the VPValue operands from \p Operands instead of \p /// Instr's operands. - void scalarizeInstruction(Instruction *Instr, VPUser &Operands, + void scalarizeInstruction(Instruction *Instr, VPValue *Def, VPUser &Operands, const VPIteration &Instance, bool IfPredicateInstr, VPTransformState &State); @@ -2903,7 +2904,8 @@ } } -void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPUser &User, +void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def, + VPUser &User, const VPIteration &Instance, bool IfPredicateInstr, VPTransformState &State) { @@ -2924,6 +2926,8 @@ if (!IsVoidRetTy) Cloned->setName(Instr->getName() + ".cloned"); + State.Builder.SetInsertPoint(Builder.GetInsertBlock(), + Builder.GetInsertPoint()); // Replace the operands of the cloned instructions with their scalar // equivalents in the new loop. for (unsigned op = 0, e = User.getNumOperands(); op != e; ++op) { @@ -2940,10 +2944,7 @@ // Place the cloned scalar in the new loop. Builder.Insert(Cloned); - // TODO: Set result for VPValue of VPReciplicateRecipe. This requires - // representing scalar values in VPTransformState. Add the cloned scalar to - // the scalar map entry. - VectorLoopValueMap.setScalarValue(Instr, Instance, Cloned); + State.set(Def, Instr, Cloned, Instance); // If we just cloned a new assumption, add it the assumption cache. if (auto *II = dyn_cast(Cloned)) @@ -4624,7 +4625,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, RecurrenceDescriptor *RdxDesc, Value *StartV, unsigned UF, - ElementCount VF) { + ElementCount VF, VPValue *Def, + VPTransformState &State) { assert(!VF.isScalable() && "scalable vectors not yet supported."); PHINode *P = cast(PN); if (EnableVPlanNativePath) { @@ -4635,7 +4637,7 @@ Type *VecTy = (VF.isScalar()) ? PN->getType() : VectorType::get(PN->getType(), VF); Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi"); - VectorLoopValueMap.setVectorValue(P, 0, VecPhi); + State.set(Def, P, VecPhi, 0); OrigPHIsToFix.push_back(P); return; @@ -4688,7 +4690,7 @@ // This is phase one of vectorizing PHIs. Value *EntryPart = PHINode::Create( VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt()); - VectorLoopValueMap.setVectorValue(P, Part, EntryPart); + State.set(Def, P, EntryPart, Part); if (StartV) { // Make sure to add the reduction start value only to the // first unroll part. @@ -4740,8 +4742,7 @@ Value *SclrGep = emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II); SclrGep->setName("next.gep"); - VectorLoopValueMap.setScalarValue(P, VPIteration(Part, Lane), - SclrGep); + State.set(Def, P, SclrGep, VPIteration(Part, Lane)); } } return; @@ -4789,7 +4790,7 @@ StartOffset, Builder.CreateVectorSplat(VF.getKnownMinValue(), ScalarStepValue), "vector.gep")); - VectorLoopValueMap.setVectorValue(P, Part, GEP); + State.set(Def, P, GEP, Part); } } } @@ -8581,6 +8582,10 @@ auto *PHIRecipe = Instr->getType()->isVoidTy() ? nullptr : new VPPredInstPHIRecipe(Plan->getOrAddVPValue(Instr)); + if (PHIRecipe) { + Plan->removeVPValueFor(Instr); + Plan->addVPValue(Instr, PHIRecipe); + } auto *Exit = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe); auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", PredRecipe); VPRegionBlock *Region = new VPRegionBlock(Entry, Exit, RegionName, true); @@ -9037,7 +9042,8 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) { Value *StartV = getStartValue() ? getStartValue()->getLiveInIRValue() : nullptr; - State.ILV->widenPHIInstruction(Phi, RdxDesc, StartV, State.UF, State.VF); + State.ILV->widenPHIInstruction(cast(getUnderlyingValue()), RdxDesc, + StartV, State.UF, State.VF, this, State); } void VPBlendRecipe::execute(VPTransformState &State) { @@ -9076,7 +9082,7 @@ } } for (unsigned Part = 0; Part < State.UF; ++Part) - State.ValueMap.setVectorValue(Phi, Part, Entry[Part]); + State.set(this, Phi, Entry[Part], Part); } void VPInterleaveRecipe::execute(VPTransformState &State) { @@ -9120,7 +9126,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { if (State.Instance) { // Generate a single instance. assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); - State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this, + State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *this, *State.Instance, IsPredicated, State); // Insert scalar instance packing it into a vector. if (AlsoPack && State.VF.isVector()) { @@ -9129,11 +9135,9 @@ assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); Value *Poison = PoisonValue::get( VectorType::get(getUnderlyingValue()->getType(), State.VF)); - State.ValueMap.setVectorValue(getUnderlyingInstr(), - State.Instance->Part, Poison); + State.set(this, getUnderlyingInstr(), Poison, State.Instance->Part); } - State.ILV->packScalarIntoVectorValue(getUnderlyingInstr(), - *State.Instance); + State.ILV->packScalarIntoVectorValue(this, *State.Instance, State); } return; } @@ -9146,7 +9150,7 @@ "Can't scalarize a scalable vector"); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this, + State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *this, VPIteration(Part, Lane), IsPredicated, State); } @@ -9184,6 +9188,8 @@ BasicBlock *PredicatedBB = ScalarPredInst->getParent(); BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); assert(PredicatingBB && "Predicated block has no single predecessor."); + assert(isa(getOperand(0)) && + "operand must be VPReplicateRecipe"); // By current pack/unpack logic we need to generate only a single phi node: if // a vector value for the predicated instruction exists at this point it means @@ -9192,21 +9198,32 @@ // also do that packing, thereby "hoisting" the insert-element sequence. // Otherwise, a phi node for the scalar value is needed. unsigned Part = State.Instance->Part; - Instruction *PredInst = - cast(getOperand(0)->getUnderlyingValue()); - if (State.ValueMap.hasVectorValue(PredInst, Part)) { - Value *VectorValue = State.ValueMap.getVectorValue(PredInst, Part); + if (State.hasVectorValue(getOperand(0), Part)) { + Value *VectorValue = State.get(getOperand(0), Part); InsertElementInst *IEI = cast(VectorValue); PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. - State.ValueMap.resetVectorValue(PredInst, Part, VPhi); // Update cache. + if (State.hasVectorValue(this, Part)) + State.reset(this, VPhi, Part); + else + State.set(this, VPhi, Part); + // NOTE: Currently we need to update the value of the operand, so the next + // predicated iteration inserts its generated value in the correct vector. + State.reset(getOperand(0), VPhi, Part); } else { - Type *PredInstType = PredInst->getType(); + Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); - Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), PredicatingBB); + Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), + PredicatingBB); Phi->addIncoming(ScalarPredInst, PredicatedBB); - State.ValueMap.resetScalarValue(PredInst, *State.Instance, Phi); + if (State.hasScalarValue(this, *State.Instance)) + State.reset(this, Phi, *State.Instance); + else + State.set(this, Phi, *State.Instance); + // NOTE: Currently we need to update the value of the operand, so the next + // predicated iteration inserts its generated value in the correct vector. + State.reset(getOperand(0), Phi, *State.Instance); } } @@ -9290,8 +9307,6 @@ if (hasVectorValue(Def, Part)) return Data.PerPartOutput[Def][Part]; - // TODO: Remove the callback once all scalar recipes are managed using - // VPValues. if (!hasScalarValue(Def, {Part, 0})) return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part); @@ -9329,7 +9344,7 @@ } else { // Initialize packing with insertelements to start from undef. assert(!VF.isScalable() && "VF is assumed to be non scalable."); - Value *Undef = UndefValue::get(VectorType::get(LastInst->getType(), VF)); + Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF)); set(Def, Undef, Part); for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) ILV->packScalarIntoVectorValue(Def, {Part, Lane}, *this); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -311,10 +311,21 @@ } Data.PerPartOutput[Def][Part] = V; } + /// Reset an existing vector value for \p Def and a given \p Part. + void reset(VPValue *Def, Value *V, unsigned Part) { + auto Iter = Data.PerPartOutput.find(Def); + assert(Iter != Data.PerPartOutput.end() && + "need to overwrite existing value"); + Iter->second[Part] = V; + } + void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part); void reset(VPValue *Def, Value *IRDef, Value *V, unsigned Part); - void set(VPValue *Def, Value *IRDef, Value *V, const VPIteration &Instance); + /// Set the generated scalar \p V for \p Def and \p IRDef and the given \p + /// Instance. + void set(VPValue *Def, Value *IRDef, Value *V, const VPIteration &Instance); + /// Set the generated scalar \p V for \p Def and the given \p Instance. void set(VPValue *Def, Value *V, const VPIteration &Instance) { auto Iter = Data.PerPartScalars.insert({Def, {}}); auto &PerPartVec = Iter.first->second; @@ -323,9 +334,22 @@ auto &Scalars = PerPartVec[Instance.Part]; while (Scalars.size() <= Instance.Lane) Scalars.push_back(nullptr); + assert(!Scalars[Instance.Lane] && "should overwrite existing value"); Scalars[Instance.Lane] = V; } + /// Reset an existing scalar value for \p Def and a given \p Instance. + void reset(VPValue *Def, Value *V, const VPIteration &Instance) { + auto Iter = Data.PerPartScalars.find(Def); + assert(Iter != Data.PerPartScalars.end() && + "need to overwrite existing value"); + assert(Instance.Part < Iter->second.size() && + "need to overwrite existing value"); + assert(Instance.Lane < Iter->second[Instance.Part].size() && + "need to overwrite existing value"); + Iter->second[Instance.Part][Instance.Lane] = V; + } + /// Hold state information used when constructing the CFG of the output IR, /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks. struct CFGState { @@ -1016,7 +1040,7 @@ /// A recipe for handling all phi nodes except for integer and FP inductions. /// For reduction PHIs, RdxDesc must point to the corresponding recurrence /// descriptor and the start value is the first operand of the recipe. -class VPWidenPHIRecipe : public VPRecipeBase { +class VPWidenPHIRecipe : public VPRecipeBase, public VPValue { PHINode *Phi; /// Descriptor for a reduction PHI. @@ -1032,10 +1056,9 @@ } /// Create a VPWidenPHIRecipe for \p Phi - VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC, {}), Phi(Phi) { - new VPValue(Phi, this); - } - + VPWidenPHIRecipe(PHINode *Phi) + : VPRecipeBase(VPWidenPHISC, {}), + VPValue(VPValue::VPVWidenPHISC, Phi, this), Phi(Phi) {} ~VPWidenPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. @@ -1058,7 +1081,7 @@ /// A recipe for vectorizing a phi-node as a sequence of mask-based select /// instructions. -class VPBlendRecipe : public VPRecipeBase { +class VPBlendRecipe : public VPRecipeBase, public VPValue { PHINode *Phi; public: @@ -1066,8 +1089,8 @@ /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value /// might be incoming with a full mask for which there is no VPValue. VPBlendRecipe(PHINode *Phi, ArrayRef Operands) - : VPRecipeBase(VPBlendSC, Operands), Phi(Phi) { - new VPValue(Phi, this); + : VPRecipeBase(VPBlendSC, Operands), + VPValue(VPValue::VPVBlendSC, Phi, this), Phi(Phi) { assert(Operands.size() > 0 && ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && "Expected either a single incoming value or a positive even number " @@ -1260,6 +1283,8 @@ VPSlotTracker &SlotTracker) const override; bool isUniform() const { return IsUniform; } + + bool isPacked() const { return AlsoPack; } }; /// A recipe for generating conditional branches on the bits of a mask. @@ -1305,14 +1330,13 @@ /// order to merge values that are set under such a branch and feed their uses. /// The phi nodes can be scalar or vector depending on the users of the value. /// This recipe works in concert with VPBranchOnMaskRecipe. -class VPPredInstPHIRecipe : public VPRecipeBase { - +class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue { public: /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi /// nodes after merging back from a Branch-on-Mask. - VPPredInstPHIRecipe(VPValue *PredV) : VPRecipeBase(VPPredInstPHISC, PredV) { - new VPValue(PredV->getUnderlyingValue(), this); - } + VPPredInstPHIRecipe(VPValue *PredV) + : VPRecipeBase(VPPredInstPHISC, PredV), + VPValue(VPValue::VPVPredInstPHI, nullptr, this) {} ~VPPredInstPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -195,18 +195,17 @@ if (hasScalarValue(Def, Instance)) return Data.PerPartScalars[Def][Instance.Part][Instance.Lane]; - if (hasVectorValue(Def, Instance.Part)) { - assert(Data.PerPartOutput.count(Def)); - auto *VecPart = Data.PerPartOutput[Def][Instance.Part]; - if (!VecPart->getType()->isVectorTy()) { - assert(Instance.Lane == 0 && "cannot get lane > 0 for scalar"); - return VecPart; - } - // TODO: Cache created scalar values. - return Builder.CreateExtractElement(VecPart, - Builder.getInt32(Instance.Lane)); + assert(hasVectorValue(Def, Instance.Part)); + auto *VecPart = Data.PerPartOutput[Def][Instance.Part]; + if (!VecPart->getType()->isVectorTy()) { + assert(Instance.Lane == 0 && "cannot get lane > 0 for scalar"); + return VecPart; } - return Callback.getOrCreateScalarValue(VPValue2Value[Def], Instance); + // TODO: Cache created scalar values. + auto *Extract = + Builder.CreateExtractElement(VecPart, Builder.getInt32(Instance.Lane)); + // set(Def, Extract, Instance); + return Extract; } BasicBlock * @@ -884,7 +883,7 @@ void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << "WIDEN-PHI " << VPlanIngredient(Phi); + O << "WIDEN-PHI " << VPlanIngredient(getUnderlyingValue()); } void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, @@ -941,6 +940,8 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << "PHI-PREDICATED-INSTRUCTION "; + printAsOperand(O, SlotTracker); + O << " = "; printOperands(O, SlotTracker); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -75,6 +75,10 @@ assert(NewRecipe->getNumDefinedValues() == 0 && "Only recpies with zero or one defined values expected"); Ingredient->eraseFromParent(); + Plan->removeVPValueFor(Inst); + for (auto *Def : NewRecipe->definedValues()) { + Plan->addVPValue(Inst, Def); + } } } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -90,13 +90,17 @@ /// type identification. enum { VPValueSC, + VPVBlendSC, VPVInstructionSC, VPVMemoryInstructionSC, + VPVPredInstPHI, VPVReductionSC, VPVReplicateSC, VPVWidenSC, VPVWidenCallSC, VPVWidenGEPSC, + VPVWidenIntOrFpIndcutionSC, + VPVWidenPHISC, VPVWidenSelectSC, }; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -118,13 +118,13 @@ ; ; CHECK: N5 [label = ; CHECK-NEXT: "pred.udiv.continue:\n" + -; CHECK-NEXT: "PHI-PREDICATED-INSTRUCTION ir<%tmp4>\l" +; CHECK-NEXT: "PHI-PREDICATED-INSTRUCTION vp<%3> = ir<%tmp4>\l" ; CHECK-NEXT: ] ; ; CHECK: N7 [label = ; CHECK-NEXT: "for.inc:\n" + ; CHECK-NEXT: "EMIT vp<%4> = not ir<%cmp>\l" + -; CHECK-NEXT: "BLEND %d = ir<0>/vp<%4> ir<%tmp4>/ir<%cmp>\l" + +; CHECK-NEXT: "BLEND %d = ir<0>/vp<%4> vp<%3>/ir<%cmp>\l" + ; CHECK-NEXT: "CLONE ir<%idx> = getelementptr ir<%x>, ir<%i>\l" + ; CHECK-NEXT: "WIDEN store ir<%idx>, ir<%d>\l" ; CHECK-NEXT: ]