diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -492,14 +492,15 @@ /// Vectorize a single PHINode in a block. This method handles the induction /// variable canonicalization. It supports both VF = 1 for unrolled loops and /// arbitrary length vectors. - void widenPHIInstruction(Instruction *PN, unsigned UF, ElementCount VF); + void widenPHIInstruction(Instruction *PN, unsigned UF, ElementCount VF, + VPValue *Def, VPTransformState &State); /// A helper function to scalarize a single Instruction in the innermost loop. /// Generates a sequence of scalar instances for each lane between \p MinLane /// and \p MaxLane, times each part between \p MinPart and \p MaxPart, /// inclusive. Uses the VPValue operands from \p Operands instead of \p /// Instr's operands. - void scalarizeInstruction(Instruction *Instr, VPUser &Operands, + void scalarizeInstruction(Instruction *Instr, VPValue *Def, VPUser &Operands, const VPIteration &Instance, bool IfPredicateInstr, VPTransformState &State); @@ -2677,7 +2678,8 @@ } } -void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPUser &User, +void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def, + VPUser &User, const VPIteration &Instance, bool IfPredicateInstr, VPTransformState &State) { @@ -2692,6 +2694,8 @@ if (!IsVoidRetTy) Cloned->setName(Instr->getName() + ".cloned"); + State.Builder.SetInsertPoint(Builder.GetInsertPoint()->getParent(), + Builder.GetInsertPoint()); // Replace the operands of the cloned instructions with their scalar // equivalents in the new loop. for (unsigned op = 0, e = User.getNumOperands(); op != e; ++op) { @@ -2711,7 +2715,8 @@ // TODO: Set result for VPValue of VPReciplicateRecipe. This requires // representing scalar values in VPTransformState. Add the cloned scalar to // the scalar map entry. - VectorLoopValueMap.setScalarValue(Instr, Instance, Cloned); + // VectorLoopValueMap.setScalarValue(Instr, Instance, Cloned); + State.set(Def, Instr, Cloned, Instance); // If we just cloned a new assumption, add it the assumption cache. if (auto *II = dyn_cast(Cloned)) @@ -4400,7 +4405,8 @@ } void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF, - ElementCount VF) { + ElementCount VF, VPValue *Def, + VPTransformState &State) { assert(!VF.isScalable() && "scalable vectors not yet supported."); PHINode *P = cast(PN); if (EnableVPlanNativePath) { @@ -4411,7 +4417,7 @@ Type *VecTy = (VF.isScalar()) ? PN->getType() : VectorType::get(PN->getType(), VF); Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi"); - VectorLoopValueMap.setVectorValue(P, 0, VecPhi); + State.set(Def, P, VecPhi, 0); OrigPHIsToFix.push_back(P); return; @@ -4433,7 +4439,7 @@ ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), VF); Value *EntryPart = PHINode::Create( VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt()); - VectorLoopValueMap.setVectorValue(P, Part, EntryPart); + State.set(Def, P, EntryPart, Part); } return; } @@ -4476,7 +4482,7 @@ Value *SclrGep = emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II); SclrGep->setName("next.gep"); - VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep); + State.set(Def, P, SclrGep, {Part, Lane}); } } return; @@ -4524,7 +4530,7 @@ StartOffset, Builder.CreateVectorSplat(VF.getKnownMinValue(), ScalarStepValue), "vector.gep")); - VectorLoopValueMap.setVectorValue(P, Part, GEP); + State.set(Def, P, GEP, Part); } } } @@ -8079,7 +8085,7 @@ } void VPWidenPHIRecipe::execute(VPTransformState &State) { - State.ILV->widenPHIInstruction(Phi, State.UF, State.VF); + State.ILV->widenPHIInstruction(Phi, State.UF, State.VF, this, State); } void VPBlendRecipe::execute(VPTransformState &State) { @@ -8118,7 +8124,7 @@ } } for (unsigned Part = 0; Part < State.UF; ++Part) - State.ValueMap.setVectorValue(Phi, Part, Entry[Part]); + State.set(this, Phi, Entry[Part], Part); } void VPInterleaveRecipe::execute(VPTransformState &State) { @@ -8162,7 +8168,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { if (State.Instance) { // Generate a single instance. - State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this, + State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *this, *State.Instance, IsPredicated, State); // Insert scalar instance packing it into a vector. if (AlsoPack && State.VF.isVector()) { @@ -8171,11 +8177,9 @@ assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); Value *Undef = UndefValue::get( VectorType::get(getUnderlyingValue()->getType(), State.VF)); - State.ValueMap.setVectorValue(getUnderlyingInstr(), - State.Instance->Part, Undef); + State.set(this, getUnderlyingInstr(), Undef, State.Instance->Part); } - State.ILV->packScalarIntoVectorValue(getUnderlyingInstr(), - *State.Instance); + State.ILV->packScalarIntoVectorValue(this, *State.Instance, State); } return; } @@ -8186,8 +8190,8 @@ unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue(); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this, {Part, Lane}, - IsPredicated, State); + State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *this, + {Part, Lane}, IsPredicated, State); } void VPBranchOnMaskRecipe::execute(VPTransformState &State) { @@ -8224,28 +8228,30 @@ BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); assert(PredicatingBB && "Predicated block has no single predecessor."); + auto *RepR = dyn_cast(getOperand(0)); + unsigned Part = State.Instance->Part; // By current pack/unpack logic we need to generate only a single phi node: if // a vector value for the predicated instruction exists at this point it means // the instruction has vector users only, and a phi for the vector value is // needed. In this case the recipe of the predicated instruction is marked to // also do that packing, thereby "hoisting" the insert-element sequence. // Otherwise, a phi node for the scalar value is needed. - unsigned Part = State.Instance->Part; - Instruction *PredInst = - cast(getOperand(0)->getUnderlyingValue()); - if (State.ValueMap.hasVectorValue(PredInst, Part)) { - Value *VectorValue = State.ValueMap.getVectorValue(PredInst, Part); + if (!State.hasVectorValue(getOperand(0), Part)) { + Type *PredInstType = RepR->getUnderlyingInstr()->getType(); + PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); + Phi->addIncoming(UndefValue::get(ScalarPredInst->getType()), PredicatingBB); + Phi->addIncoming(ScalarPredInst, PredicatedBB); + State.set(this, Phi, *State.Instance); + State.set(getOperand(0), Phi, *State.Instance); + + } else { + Value *VectorValue = State.get(getOperand(0), Part); InsertElementInst *IEI = cast(VectorValue); PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. - State.ValueMap.resetVectorValue(PredInst, Part, VPhi); // Update cache. - } else { - Type *PredInstType = PredInst->getType(); - PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); - Phi->addIncoming(UndefValue::get(ScalarPredInst->getType()), PredicatingBB); - Phi->addIncoming(ScalarPredInst, PredicatedBB); - State.ValueMap.resetScalarValue(PredInst, *State.Instance, Phi); + State.set(this, VPhi, VPhi, Part); + State.set(getOperand(0), VPhi, Part); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -283,6 +283,7 @@ Value *get(VPValue *Def, unsigned Part); /// Get the generated Value for a given VPValue and given Part and Lane. + Value *get(VPValue *Def, VPIteration Instance); bool hasVectorValue(VPValue *Def, unsigned Part) { @@ -308,6 +309,7 @@ } Data.PerPartOutput[Def][Part] = V; } + void set(VPValue *Def, Value *IRDef, Value *V, unsigned Part); void set(VPValue *Def, Value *IRDef, Value *V, const VPIteration &Instance); @@ -948,7 +950,6 @@ VPWidenIntOrFpInductionRecipe(PHINode *IV, Instruction *Cast, TruncInst *Trunc = nullptr) : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), IsTrunc(false) { - if (Trunc) { new VPValue(VPValue::VPValueSubSC, Trunc, this); IsTrunc = true; @@ -975,13 +976,13 @@ }; /// A recipe for handling all phi nodes except for integer and FP inductions. -class VPWidenPHIRecipe : public VPRecipeBase { +class VPWidenPHIRecipe : public VPRecipeBase, public VPValue { PHINode *Phi; public: - VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) { - new VPValue(VPValue::VPValueSubSC, Phi, this); - } + VPWidenPHIRecipe(PHINode *Phi) + : VPRecipeBase(VPWidenPHISC), VPValue(VPValue::VPVWidenPHISC, Phi, this), + Phi(Phi) {} ~VPWidenPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. @@ -999,7 +1000,7 @@ /// A recipe for vectorizing a phi-node as a sequence of mask-based select /// instructions. -class VPBlendRecipe : public VPRecipeBase, public VPUser { +class VPBlendRecipe : public VPRecipeBase, public VPUser, public VPValue { PHINode *Phi; public: @@ -1007,8 +1008,8 @@ /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value /// might be incoming with a full mask for which there is no VPValue. VPBlendRecipe(PHINode *Phi, ArrayRef Operands) - : VPRecipeBase(VPBlendSC), VPUser(Operands), Phi(Phi) { - new VPValue(VPValue::VPValueSubSC, Phi, this); + : VPRecipeBase(VPBlendSC), VPUser(Operands), + VPValue(VPValue::VPVBlendSC, Phi, this), Phi(Phi) { assert(Operands.size() > 0 && ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && "Expected either a single incoming value or a positive even number " @@ -1206,6 +1207,8 @@ VPSlotTracker &SlotTracker) const override; bool isUniform() const { return IsUniform; } + + bool isPacked() const { return AlsoPack; } }; /// A recipe for generating conditional branches on the bits of a mask. @@ -1250,15 +1253,13 @@ /// order to merge values that are set under such a branch and feed their uses. /// The phi nodes can be scalar or vector depending on the users of the value. /// This recipe works in concert with VPBranchOnMaskRecipe. -class VPPredInstPHIRecipe : public VPRecipeBase, public VPUser { - +class VPPredInstPHIRecipe : public VPRecipeBase, public VPUser, public VPValue { public: /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi /// nodes after merging back from a Branch-on-Mask. VPPredInstPHIRecipe(VPValue *PredV) - : VPRecipeBase(VPPredInstPHISC), VPUser(PredV) { - new VPValue(VPValue::VPValueSubSC, PredV->getUnderlyingValue(), this); - } + : VPRecipeBase(VPPredInstPHISC), VPUser(PredV), + VPValue(VPValue::VPVPredInstPHI, nullptr, this) {} ~VPPredInstPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -225,7 +225,6 @@ if (auto *ReplicateR = dyn_cast(Def)) if (ReplicateR->isUniform()) Instance.Lane = 0; - if (hasScalarValue(Def, Instance)) return Data.PerPartScalars[Def][Instance.Part][Instance.Lane]; @@ -243,7 +242,20 @@ return Extract; } - return Callback.getOrCreateScalarValue(VPValue2Value[Def], Instance); + if (hasScalarValue(Def, Instance)) + return Data.PerPartScalars[Def][Instance.Part][Instance.Lane]; + + assert(hasVectorValue(Def, Instance.Part)); + auto *VecPart = Data.PerPartOutput[Def][Instance.Part]; + if (!VecPart->getType()->isVectorTy()) { + assert(Instance.Lane == 0 && "cannot get lane > 0 for scalar"); + return VecPart; + } + // TODO: Cache created scalar values. + auto *Extract = + Builder.CreateExtractElement(VecPart, Builder.getInt32(Instance.Lane)); + // set(Def, Extract, Instance); + return Extract; } BasicBlock * @@ -975,6 +987,8 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << "\"PHI-PREDICATED-INSTRUCTION "; + printAsOperand(O, SlotTracker); + O << " = "; printOperands(O, SlotTracker); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -81,6 +81,10 @@ NewRecipe->insertBefore(Ingredient); VPInst->replaceAllUsesWith(&DummyValue); Ingredient->eraseFromParent(); + Plan->removeVPValueFor(Inst); + for (auto *Def : NewRecipe->definedValues()) { + Plan->addVPValue(Inst, Def); + } } } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -101,13 +101,17 @@ enum { VPValueSC, /// VPValue defined outside of VPlan. VPValueSubSC, // VPValue defined by a VPDef that defines multiple VPValues. + VPVBlendSC, VPVInstructionSC, VPVMemoryInstructionSC, + VPVPredInstPHI, VPVReductionSC, VPVReplicateSC, VPVWidenSC, VPVWidenCallSC, VPVWidenGEPSC, + VPVWidenIntOrFpIndcutionSC, + VPVWidenPHISC, VPVWidenSelectSC, };