Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -292,8 +292,7 @@ /// Build a VPlan using VPRecipes according to the information gather by /// Legal. This method is only used for the legacy inner loop vectorizer. VPlanPtr buildVPlanWithVPRecipes( - VFRange &Range, SmallPtrSetImpl &NeedDef, - SmallPtrSetImpl &DeadInstructions, + VFRange &Range, SmallPtrSetImpl &DeadInstructions, const DenseMap &SinkAfter); /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, @@ -307,6 +306,8 @@ /// reduction chain. void adjustRecipesForInLoopReductions(VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder); + bool adjustVPlanForVectorPatterns(VPlanPtr &Plan, + VPRecipeBuilder &RecipeBuilder); }; } // namespace llvm Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -152,6 +152,8 @@ #include #include +#include + using namespace llvm; #define LV_NAME "loop-vectorize" @@ -555,7 +557,6 @@ /// non-null. Use \p State to translate given VPValues to IR values in the /// vectorized loop. void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State, - VPValue *Def, VPValue *Addr, VPValue *StoredValue, VPValue *BlockInMask); @@ -7254,7 +7255,7 @@ if (!BI->isConditional() || BI->getSuccessor(0) == BI->getSuccessor(1)) return EdgeMaskCache[Edge] = SrcMask; - VPValue *EdgeMask = Plan->getVPValue(BI->getCondition()); + VPValue *EdgeMask = Plan->getOrAddVPValue(BI->getCondition()); assert(EdgeMask && "No Edge Mask found for condition"); if (BI->getSuccessor(0) != Dst) @@ -7292,11 +7293,11 @@ // Start by constructing the desired canonical IV. VPValue *IV = nullptr; if (Legal->getPrimaryInduction()) - IV = Plan->getVPValue(Legal->getPrimaryInduction()); + IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction()); else { auto IVRecipe = new VPWidenCanonicalIVRecipe(); Builder.getInsertBlock()->insert(IVRecipe, NewInsertionPoint); - IV = IVRecipe->getVPValue(); + IV = IVRecipe; } VPValue *BTC = Plan->getOrCreateBackedgeTakenCount(); bool TailFolded = !CM.isScalarEpilogueAllowed(); @@ -7539,7 +7540,10 @@ return nullptr; // Success: widen this instruction. - return new VPWidenRecipe(*I, Plan.mapToVPValues(I->operands())); + VPWidenRecipe *Recipe = + new VPWidenRecipe(*I, Plan.mapToVPValues(I->operands())); + Plan.addVPValue(I, Recipe); + return Recipe; } VPBasicBlock *VPRecipeBuilder::handleReplication( @@ -7662,32 +7666,6 @@ unsigned MaxVF) { assert(OrigLoop->empty() && "Inner loop expected."); - // Collect conditions feeding internal conditional branches; they need to be - // represented in VPlan for it to model masking. - SmallPtrSet NeedDef; - - auto *Latch = OrigLoop->getLoopLatch(); - for (BasicBlock *BB : OrigLoop->blocks()) { - if (BB == Latch) - continue; - BranchInst *Branch = dyn_cast(BB->getTerminator()); - if (Branch && Branch->isConditional()) - NeedDef.insert(Branch->getCondition()); - } - - // If the tail is to be folded by masking, the primary induction variable, if - // exists needs to be represented in VPlan for it to model early-exit masking. - // Also, both the Phi and the live-out instruction of each reduction are - // required in order to introduce a select between them in VPlan. - if (CM.foldTailByMasking()) { - if (Legal->getPrimaryInduction()) - NeedDef.insert(Legal->getPrimaryInduction()); - for (auto &Reduction : Legal->getReductionVars()) { - NeedDef.insert(Reduction.first); - NeedDef.insert(Reduction.second.getLoopExitInstr()); - } - } - // Collect instructions from the original loop that will become trivially dead // in the vectorized loop. We don't need to vectorize these instructions. For // example, original induction update instructions can become dead because we @@ -7711,15 +7689,14 @@ for (unsigned VF = MinVF; VF < MaxVF + 1;) { VFRange SubRange = {VF, MaxVF + 1}; - VPlans.push_back(buildVPlanWithVPRecipes(SubRange, NeedDef, - DeadInstructions, SinkAfter)); + VPlans.push_back( + buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter)); VF = SubRange.End; } } VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( - VFRange &Range, SmallPtrSetImpl &NeedDef, - SmallPtrSetImpl &DeadInstructions, + VFRange &Range, SmallPtrSetImpl &DeadInstructions, const DenseMap &SinkAfter) { // Hold a mapping from predicated instructions to their recipes, in order to @@ -7808,10 +7785,6 @@ VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); Plan->setEntry(VPBB); - // Represent values that will have defs inside VPlan. - for (Value *V : NeedDef) - Plan->addVPValue(V); - // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. LoopBlocksDFS DFS(OrigLoop); @@ -7880,8 +7853,10 @@ } // Adjust the recipes for any inloop reductions. - if (Range.Start > 1) + if (Range.Start > 1) { adjustRecipesForInLoopReductions(Plan, RecipeBuilder); + adjustVPlanForVectorPatterns(Plan, RecipeBuilder); + } // Finally, if tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the end of the latch. @@ -7891,8 +7866,8 @@ for (auto &Reduction : Legal->getReductionVars()) { if (CM.isInLoopReduction(Reduction.first)) continue; - VPValue *Phi = Plan->getVPValue(Reduction.first); - VPValue *Red = Plan->getVPValue(Reduction.second.getLoopExitInstr()); + VPValue *Phi = Plan->getOrAddVPValue(Reduction.first); + VPValue *Red = Plan->getOrAddVPValue(Reduction.second.getLoopExitInstr()); Builder.createNaryOp(Instruction::Select, {Cond, Red, Phi}); } } @@ -7966,21 +7941,21 @@ VPRecipeBase *WidenRecipe = RecipeBuilder.getRecipe(R); RecurrenceDescriptor::RecurrenceKind Kind = RdxDesc.getRecurrenceKind(); - VPValue *ChainOp = Plan->getVPValue(Chain); + VPValue *ChainOp = Plan->getOrAddVPValue(Chain); unsigned FirstOpId; if (Kind == RecurrenceDescriptor::RK_IntegerMinMax || Kind == RecurrenceDescriptor::RK_FloatMinMax) { - assert(WidenRecipe->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC && + assert(WidenRecipe->getVPValueID() == VPRecipeBase::VPWidenSelectSC && "Expected to replace a VPWidenSelectSC"); FirstOpId = 1; } else { - assert(WidenRecipe->getVPRecipeID() == VPRecipeBase::VPWidenSC && + assert(WidenRecipe->getVPValueID() == VPRecipeBase::VPWidenSC && "Expected to replace a VPWidenSC"); FirstOpId = 0; } unsigned VecOpId = R->getOperand(FirstOpId) == Chain ? FirstOpId + 1 : FirstOpId; - VPValue *VecOp = Plan->getVPValue(R->getOperand(VecOpId)); + VPValue *VecOp = Plan->getOrAddVPValue(R->getOperand(VecOpId)); bool UseReductionIntrinsic = TTI->useReductionIntrinsic( RdxDesc.getRecurrenceBinOp(Kind), RdxDesc.getRecurrenceType(), @@ -7993,12 +7968,13 @@ UseReductionIntrinsic); WidenRecipe->getParent()->insert(RedRecipe, WidenRecipe->getIterator()); WidenRecipe->eraseFromParent(); + Plan->addOrReplaceVPValue(R, RedRecipe); if (Kind == RecurrenceDescriptor::RK_IntegerMinMax || Kind == RecurrenceDescriptor::RK_FloatMinMax) { VPRecipeBase *CompareRecipe = RecipeBuilder.getRecipe(cast(R->getOperand(0))); - assert(CompareRecipe->getVPRecipeID() == VPRecipeBase::VPWidenSC && + assert(CompareRecipe->getVPValueID() == VPRecipeBase::VPWidenSC && "Expected to replace a VPWidenSC"); CompareRecipe->eraseFromParent(); } @@ -8007,6 +7983,132 @@ } } +// TODOD: Move to a VPPatternMatch.h file. +using namespace PatternMatch; + +template +struct VPBinaryOp_match { + LHS_t L; + RHS_t R; + + VPBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} + + template bool match(OpTy *V) { + const VPWidenRecipe *VW = dyn_cast(V); + if (VW && VW->getOpcode() == Opcode) + return L.match(VW->getOperand(0)) && R.match(VW->getOperand(1)); + // TODOD: VPInstruction? + return false; + } +}; +template +inline VPBinaryOp_match vp_Mul(const LHS &L, + const RHS &R) { + return VPBinaryOp_match(L, R); +} +template +inline VPBinaryOp_match vp_LShr(const LHS &L, + const RHS &R) { + return VPBinaryOp_match(L, R); +} + +template struct VPUnaryOp_match { + LHS_t L; + + VPUnaryOp_match(const LHS_t &LHS) : L(LHS) {} + + template bool match(OpTy *V) { + const VPWidenRecipe *VW = dyn_cast(V); + if (VW && VW->getOpcode() == Opcode) + return L.match(VW->getOperand(0)); + return false; + } +}; +template +inline VPUnaryOp_match vp_Trunc(const LHS &L) { + return VPUnaryOp_match(L); +} +template +inline VPUnaryOp_match vp_SExt(const LHS &L) { + return VPUnaryOp_match(L); +} +template +inline VPUnaryOp_match vp_ZExt(const LHS &L) { + return VPUnaryOp_match(L); +} + +inline bind_ty vp_Value(VPValue *&V) { return V; } +inline bind_ty vp_Value(const VPValue *&V) { return V; } + +struct VPspecific_intval { + APInt Val; + + VPspecific_intval(APInt V) : Val(std::move(V)) {} + + template bool match(ITy *V) { + const auto *CI = dyn_cast(V->getUnderlyingValue()); + return CI && APInt::isSameValue(CI->getValue(), Val); + } +}; +inline VPspecific_intval vp_SpecificInt(uint64_t V) { + return VPspecific_intval(APInt(64, V)); +} + +// trunc(lsr(mul(ext, ext)), BW)) -> mulhs/mulhu +static bool tryCombineToMulh(VPBasicBlock::reverse_iterator &It, VPlanPtr &Plan, + VPRecipeBuilder &RecipeBuilder) { + VPWidenRecipe *RV = dyn_cast(&*It); + if (!RV) + return false; + + Type *Ty = RV->getType(); + unsigned BW = Ty->getScalarSizeInBits(); + + VPValue *Mul; + // TODOD: One use checks? + if (!match(RV, vp_Trunc(vp_LShr(vp_Value(Mul), vp_SpecificInt(BW))))) + return false; + + VPValue *A, *B; + unsigned Opcode = 0; + if (match(Mul, vp_Mul(vp_SExt(vp_Value(A)), vp_SExt(vp_Value(B))))) + Opcode = VPInstruction::Mulhs; + else if (match(Mul, vp_Mul(vp_ZExt(vp_Value(A)), vp_ZExt(vp_Value(B))))) + Opcode = VPInstruction::Mulhu; + else + return false; + + unsigned MulBW = Mul->getType()->getScalarSizeInBits(); + if (MulBW > BW * 2) + return false; + + if (A->getType() != Ty || B->getType() != Ty) // TODOD: Generalize + return false; + + auto *Mulh = new VPInstruction(Opcode, {A, B}); + RV->getParent()->insert(Mulh, RV->getIterator()); + RV->replaceAllUsesWith(Mulh); + RV->recursivelyDeleteUnusedRecipes(); + It = Mulh->getReverseIterator(); + return true; +} + +bool LoopVectorizationPlanner::adjustVPlanForVectorPatterns( + VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder) { + bool Changed = false; + for (VPBlockBase *Block : depth_first(Plan->getEntry())) { + if (auto *BB = dyn_cast(Block)) { + for (VPBasicBlock::reverse_iterator It = BB->rbegin(), E = BB->rend(); + It != E; ++It) { + if (true /*TODOD: TTI->hasVectorPattern(Mulh)*/) + Changed |= tryCombineToMulh(It, Plan, RecipeBuilder); + } + } + } + + return Changed; +} + Value* LoopVectorizationPlanner::VPCallbackILV:: getOrCreateVectorValues(Value *V, unsigned Part) { return ILV.getOrCreateVectorValue(V, Part); @@ -8034,35 +8136,36 @@ } void VPWidenCallRecipe::execute(VPTransformState &State) { - State.ILV->widenCallInstruction(*getUnderlyingCall(), this, User, State); + State.ILV->widenCallInstruction(*getUnderlyingCall(), this, *this, State); } void VPWidenSelectRecipe::execute(VPTransformState &State) { - State.ILV->widenSelectInstruction(*getUnderlyingInstruction(), this, User, + State.ILV->widenSelectInstruction(*getUnderlyingInstruction(), this, *this, InvariantCond, State); } void VPWidenRecipe::execute(VPTransformState &State) { - State.ILV->widenInstruction(Ingredient, User, State); + State.ILV->widenInstruction(*getUnderlyingInstruction(), *this, + State); } void VPWidenGEPRecipe::execute(VPTransformState &State) { - State.ILV->widenGEP(getUnderlyingInstruction(), this, User, State.UF, + State.ILV->widenGEP(getUnderlyingInstruction(), this, *this, State.UF, State.VF, IsPtrLoopInvariant, IsIndexLoopInvariant, State); } void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Int or FP induction being replicated."); - State.ILV->widenIntOrFpInduction(IV, Trunc); + State.ILV->widenIntOrFpInduction(getIV(), Trunc); } void VPWidenPHIRecipe::execute(VPTransformState &State) { - State.ILV->widenPHIInstruction(Phi, State.UF, State.VF); + State.ILV->widenPHIInstruction(getPhi(), State.UF, State.VF); } void VPBlendRecipe::execute(VPTransformState &State) { - State.ILV->setDebugLocFromInst(State.Builder, Phi); + State.ILV->setDebugLocFromInst(State.Builder, getPhi()); // We know that all PHIs in non-header blocks are converted into // selects, so we don't have to worry about the insertion order and we // can just use the builder. @@ -8097,7 +8200,7 @@ } } for (unsigned Part = 0; Part < State.UF; ++Part) - State.ValueMap.setVectorValue(Phi, Part, Entry[Part]); + State.ValueMap.setVectorValue(getPhi(), Part, Entry[Part]); } void VPInterleaveRecipe::execute(VPTransformState &State) { @@ -8110,9 +8213,9 @@ assert(!State.Instance && "Reduction being replicated."); for (unsigned Part = 0; Part < State.UF; ++Part) { RecurrenceDescriptor::RecurrenceKind Kind = RdxDesc->getRecurrenceKind(); - Value *NewVecOp = State.get(VecOp, Part); - if (CondOp) { - Value *NewCond = State.get(CondOp, Part); + Value *NewVecOp = State.get(getVecOp(), Part); + if (getCondOp()) { + Value *NewCond = State.get(getCondOp(), Part); VectorType *VecTy = cast(NewVecOp->getType()); Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity( Kind, RdxDesc->getMinMaxRecurrenceKind(), VecTy->getElementType()); @@ -8123,7 +8226,7 @@ } Value *NewRed = createTargetReduction(State.Builder, *RdxDesc, NewVecOp, UseReductionIntrinsic, NoNaN); - Value *PrevInChain = State.get(ChainOp, Part); + Value *PrevInChain = State.get(getChainOp(), Part); Value *NextInChain; if (Kind == RecurrenceDescriptor::RK_IntegerMinMax || Kind == RecurrenceDescriptor::RK_FloatMinMax) { @@ -8132,26 +8235,30 @@ NewRed, PrevInChain); } else { NextInChain = State.Builder.CreateBinOp( - (Instruction::BinaryOps)I->getOpcode(), NewRed, PrevInChain); + (Instruction::BinaryOps)cast(getUnderlyingValue()) + ->getOpcode(), + NewRed, PrevInChain); } - State.ValueMap.setVectorValue(I, Part, NextInChain); + State.ValueMap.setVectorValue(getUnderlyingValue(), Part, NextInChain); } } void VPReplicateRecipe::execute(VPTransformState &State) { if (State.Instance) { // Generate a single instance. - State.ILV->scalarizeInstruction(Ingredient, User, *State.Instance, + + State.ILV->scalarizeInstruction(getIngredient(), *this, *State.Instance, IsPredicated, State); // Insert scalar instance packing it into a vector. if (AlsoPack && State.VF.isVector()) { // If we're constructing lane 0, initialize to start from undef. if (State.Instance->Lane == 0) { assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); - Value *Undef = - UndefValue::get(VectorType::get(Ingredient->getType(), State.VF)); - State.ValueMap.setVectorValue(Ingredient, State.Instance->Part, Undef); + Value *Undef = UndefValue::get( + VectorType::get(getIngredient()->getType(), State.VF)); + State.ValueMap.setVectorValue(getIngredient(), State.Instance->Part, + Undef); } - State.ILV->packScalarIntoVectorValue(Ingredient, *State.Instance); + State.ILV->packScalarIntoVectorValue(getIngredient(), *State.Instance); } return; } @@ -8162,7 +8269,7 @@ unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue(); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(Ingredient, User, {Part, Lane}, + State.ILV->scalarizeInstruction(getIngredient(), *this, {Part, Lane}, IsPredicated, State); } @@ -8195,7 +8302,7 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) { assert(State.Instance && "Predicated instruction PHI works per instance."); Instruction *ScalarPredInst = cast( - State.ValueMap.getScalarValue(PredInst, *State.Instance)); + State.ValueMap.getScalarValue(getPredInst(), *State.Instance)); BasicBlock *PredicatedBB = ScalarPredInst->getParent(); BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); assert(PredicatingBB && "Predicated block has no single predecessor."); @@ -8207,19 +8314,19 @@ // also do that packing, thereby "hoisting" the insert-element sequence. // Otherwise, a phi node for the scalar value is needed. unsigned Part = State.Instance->Part; - if (State.ValueMap.hasVectorValue(PredInst, Part)) { - Value *VectorValue = State.ValueMap.getVectorValue(PredInst, Part); + if (State.ValueMap.hasVectorValue(getPredInst(), Part)) { + Value *VectorValue = State.ValueMap.getVectorValue(getPredInst(), Part); InsertElementInst *IEI = cast(VectorValue); PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. - State.ValueMap.resetVectorValue(PredInst, Part, VPhi); // Update cache. + State.ValueMap.resetVectorValue(getPredInst(), Part, VPhi); // Update cache. } else { - Type *PredInstType = PredInst->getType(); + Type *PredInstType = getPredInst()->getType(); PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); Phi->addIncoming(UndefValue::get(ScalarPredInst->getType()), PredicatingBB); Phi->addIncoming(ScalarPredInst, PredicatedBB); - State.ValueMap.resetScalarValue(PredInst, *State.Instance, Phi); + State.ValueMap.resetScalarValue(getPredInst(), *State.Instance, Phi); } } Index: llvm/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.h +++ llvm/lib/Transforms/Vectorize/VPlan.h @@ -606,12 +606,11 @@ /// VPRecipeBase is a base class modeling a sequence of one or more output IR /// instructions. -class VPRecipeBase : public ilist_node_with_parent { +class VPRecipeBase : public VPUser, + public ilist_node_with_parent { friend VPBasicBlock; friend class VPBlockUtils; - const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). - /// Each VPRecipe belongs to a single VPBasicBlock. VPBasicBlock *Parent = nullptr; @@ -621,7 +620,7 @@ /// SubclassID field of the VPRecipeBase objects. They are used for concrete /// type identification. using VPRecipeTy = enum { - VPBlendSC, + VPBlendSC = VPValue::VPValueLastID, VPBranchOnMaskSC, VPInstructionSC, VPInterleaveSC, @@ -638,14 +637,15 @@ VPWidenSelectSC }; - VPRecipeBase(const unsigned char SC) : SubclassID(SC) {} + VPRecipeBase(const unsigned char SC, Value *UV) : VPUser(SC, UV) {} + VPRecipeBase(const unsigned char SC, Value *UV, ArrayRef Operands) + : VPUser(SC, UV, Operands) {} + template + VPRecipeBase(const unsigned char SC, Value *UV, + iterator_range Operands) + : VPUser(SC, UV, Operands) {} virtual ~VPRecipeBase() = default; - /// \return an ID for the concrete type of this object. - /// This is used to implement the classof checks. This should not be used - /// for any other purpose, as the values may change as LLVM evolves. - unsigned getVPRecipeID() const { return SubclassID; } - /// \return the VPBasicBlock which this VPRecipe belongs to. VPBasicBlock *getParent() { return Parent; } const VPBasicBlock *getParent() const { return Parent; } @@ -678,13 +678,19 @@ /// /// \returns an iterator pointing to the element after the erased one iplist::iterator eraseFromParent(); + + void recursivelyDeleteUnusedRecipes(); + + static inline bool classof(const VPValue *V) { + return V->getVPValueID() >= VPBlendSC; + } }; /// This is a concrete Recipe that models a single VPlan-level instruction. /// While as any Recipe it may generate a sequence of IR instructions when /// executed, these instructions would always form a single-def expression as /// the VPInstruction is also a single def-use vertex. -class VPInstruction : public VPUser, public VPValue, public VPRecipeBase { +class VPInstruction : public VPRecipeBase { friend class VPlanSlp; public: @@ -695,6 +701,8 @@ SLPLoad, SLPStore, ActiveLaneMask, + Mulhs, + Mulhu, }; private: @@ -714,25 +722,14 @@ public: VPInstruction(unsigned Opcode, ArrayRef Operands) - : VPUser(Operands), VPValue(VPValue::VPInstructionSC), - VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) {} + : VPRecipeBase(VPInstructionSC, nullptr, Operands), Opcode(Opcode) {} VPInstruction(unsigned Opcode, std::initializer_list Operands) : VPInstruction(Opcode, ArrayRef(Operands)) {} /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPValue *V) { - return V->getVPValueID() == VPValue::VPInstructionSC; - } - - VPInstruction *clone() const { - SmallVector Operands(operands()); - return new VPInstruction(Opcode, Operands); - } - - /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *R) { - return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC; + return V->getVPValueID() == VPInstructionSC; } unsigned getOpcode() const { return Opcode; } @@ -783,22 +780,16 @@ /// ingredient. This recipe covers most of the traditional vectorization cases /// where each ingredient transforms into a vectorized version of itself. class VPWidenRecipe : public VPRecipeBase { - /// Hold the instruction to be widened. - Instruction &Ingredient; - - /// Hold VPValues for the operands of the ingredient. - VPUser User; - public: template VPWidenRecipe(Instruction &I, iterator_range Operands) - : VPRecipeBase(VPWidenSC), Ingredient(I), User(Operands) {} + : VPRecipeBase(VPWidenSC, &I, Operands) {} ~VPWidenRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenSC; + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPWidenSC; } /// Produce widened copies of all Ingredients. @@ -807,24 +798,28 @@ /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; + + Instruction *getUnderlyingInstruction() { + return cast(getUnderlyingValue()); + } + const Instruction *getUnderlyingInstruction() const { + return cast(getUnderlyingValue()); + } + unsigned getOpcode() const { return getUnderlyingInstruction()->getOpcode(); } }; /// A recipe for widening Call instructions. -class VPWidenCallRecipe : public VPRecipeBase, public VPValue { - /// Hold VPValues for the arguments of the call. - VPUser User; - +class VPWidenCallRecipe : public VPRecipeBase { public: template VPWidenCallRecipe(CallInst &I, iterator_range CallArguments) - : VPRecipeBase(VPWidenCallSC), VPValue(VPValue::VPVWidenCallSC, &I), - User(CallArguments) {} + : VPRecipeBase(VPWidenCallSC, &I, CallArguments) {} ~VPWidenCallRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenCallSC; + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPWidenCallSC; } /// Produce a widened version of the call instruction. @@ -841,11 +836,8 @@ }; /// A recipe for widening select instructions. -class VPWidenSelectRecipe : public VPRecipeBase, public VPValue { +class VPWidenSelectRecipe : public VPRecipeBase { private: - /// Hold VPValues for the operands of the select. - VPUser User; - /// Is the condition of the select loop invariant? bool InvariantCond; @@ -853,14 +845,14 @@ template VPWidenSelectRecipe(SelectInst &I, iterator_range Operands, bool InvariantCond) - : VPRecipeBase(VPWidenSelectSC), VPValue(VPValue::VPVWidenSelectSC, &I), - User(Operands), InvariantCond(InvariantCond) {} + : VPRecipeBase(VPWidenSelectSC, &I, Operands), + InvariantCond(InvariantCond) {} ~VPWidenSelectRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC; + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPWidenSelectSC; } /// Produce a widened version of the select instruction. @@ -876,10 +868,7 @@ }; /// A recipe for handling GEP instructions. -class VPWidenGEPRecipe : public VPRecipeBase, public VPValue { - /// Hold VPValues for the base and indices of the GEP. - VPUser User; - +class VPWidenGEPRecipe : public VPRecipeBase { bool IsPtrLoopInvariant; SmallBitVector IsIndexLoopInvariant; @@ -887,8 +876,8 @@ template VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range Operands, Loop *OrigLoop) - : VPRecipeBase(VPWidenGEPSC), VPValue(VPValue::VPVWidenGEPSC, GEP), - User(Operands), IsIndexLoopInvariant(GEP->getNumIndices(), false) { + : VPRecipeBase(VPWidenGEPSC, GEP, Operands), + IsIndexLoopInvariant(GEP->getNumIndices(), false) { IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand()); for (auto Index : enumerate(GEP->indices())) IsIndexLoopInvariant[Index.index()] = @@ -897,8 +886,8 @@ ~VPWidenGEPRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC; + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPWidenGEPSC; } /// Generate the gep nodes. @@ -919,17 +908,16 @@ /// A recipe for handling phi nodes of integer and floating-point inductions, /// producing their vector and scalar values. class VPWidenIntOrFpInductionRecipe : public VPRecipeBase { - PHINode *IV; TruncInst *Trunc; public: VPWidenIntOrFpInductionRecipe(PHINode *IV, TruncInst *Trunc = nullptr) - : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) {} + : VPRecipeBase(VPWidenIntOrFpInductionSC, IV), Trunc(Trunc) {} ~VPWidenIntOrFpInductionRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC; + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPWidenIntOrFpInductionSC; } /// Generate the vectorized and scalarized versions of the phi node as @@ -939,19 +927,20 @@ /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; + + PHINode *getIV() { return cast(getUnderlyingValue()); } + const PHINode *getIV() const { return cast(getUnderlyingValue()); } }; /// A recipe for handling all phi nodes except for integer and FP inductions. class VPWidenPHIRecipe : public VPRecipeBase { - PHINode *Phi; - public: - VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {} + VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC, Phi) {} ~VPWidenPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenPHISC; + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPWidenPHISC; } /// Generate the phi/select nodes. @@ -960,21 +949,17 @@ /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; + + PHINode *getPhi() { return cast(getUnderlyingValue()); } + const PHINode *getPhi() const { return cast(getUnderlyingValue()); } }; /// A recipe for vectorizing a phi-node as a sequence of mask-based select /// instructions. class VPBlendRecipe : public VPRecipeBase { - PHINode *Phi; - - /// The blend operation is a User of the incoming values and of their - /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value - /// might be incoming with a full mask for which there is no VPValue. - VPUser User; - public: VPBlendRecipe(PHINode *Phi, ArrayRef Operands) - : VPRecipeBase(VPBlendSC), Phi(Phi), User(Operands) { + : VPRecipeBase(VPBlendSC, Phi, Operands) { assert(Operands.size() > 0 && ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && "Expected either a single incoming value or a positive even number " @@ -982,23 +967,22 @@ } /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPBlendSC; + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPBlendSC; } /// Return the number of incoming values, taking into account that a single /// incoming value has no mask. - unsigned getNumIncomingValues() const { - return (User.getNumOperands() + 1) / 2; - } + unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; } /// Return incoming value number \p Idx. - VPValue *getIncomingValue(unsigned Idx) const { - return User.getOperand(Idx * 2); - } + VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); } /// Return mask number \p Idx. - VPValue *getMask(unsigned Idx) const { return User.getOperand(Idx * 2 + 1); } + VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); } + + PHINode *getPhi() { return cast(getUnderlyingValue()); } + const PHINode *getPhi() const { return cast(getUnderlyingValue()); } /// Generate the phi/select nodes. void execute(VPTransformState &State) override; @@ -1010,41 +994,36 @@ /// VPInterleaveRecipe is a recipe for transforming an interleave group of load /// or stores into one wide load/store and shuffles. -class VPInterleaveRecipe : public VPRecipeBase, public VPValue { +class VPInterleaveRecipe : public VPRecipeBase { const InterleaveGroup *IG; - VPUser User; SmallVector Defs; public: VPInterleaveRecipe(const InterleaveGroup *IG, VPValue *Addr, VPValue *Mask) - : VPRecipeBase(VPInterleaveSC), VPValue(VPValue::VPVInterleaveSC), IG(IG), - User({Addr}) { + : VPRecipeBase(VPInterleaveSC, nullptr, {Addr}), IG(IG) { if (Mask) - User.addOperand(Mask); + addOperand(Mask); for (unsigned i = 0; i < IG->getNumMembers(); i++) Defs.push_back(new VPMultiValue(this)); } ~VPInterleaveRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC; - } static inline bool classof(const VPValue *V) { - return V->getVPValueID() == VPValue::VPVInterleaveSC; + return V->getVPValueID() == VPInterleaveSC; } /// Return the address accessed by this recipe. VPValue *getAddr() const { - return User.getOperand(0); // Address is the 1st, mandatory operand. + return getOperand(0); // Address is the 1st, mandatory operand. } /// Return the mask used by this recipe. Note that a full mask is represented /// by a nullptr. VPValue *getMask() const { // Mask is optional and therefore the last, currently 2nd operand. - return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr; + return getNumOperands() == 2 ? getOperand(1) : nullptr; } /// Generate the wide load or store, and shuffles. @@ -1060,35 +1039,11 @@ ArrayRef getDefs() { return Defs; } }; -/*template <> struct simplify_type {*/ -// using SimpleType = VPValue *; - -// static SimpleType getSimplifiedValue(VPMultiValue &Val) { -// return Val.getProducer(); -//} -//}; - -// template <> struct simplify_type { -// using SimpleType = VPValue *; - -// static SimpleType getSimplifiedValue(VPMultiValue *&Val) { -// return Val->getProducer(); -//} -//}; - /// A recipe to represent inloop reduction operations, performing a reduction on /// a vector operand into a scalar value, and adding the result to a chain. class VPReductionRecipe : public VPRecipeBase { /// The recurrence decriptor for the reduction in question. RecurrenceDescriptor *RdxDesc; - /// The original instruction being converted to a reduction. - Instruction *I; - /// The VPValue of the vector value to be reduced. - VPValue *VecOp; - /// The VPValue of the scalar Chain being accumulated. - VPValue *ChainOp; - /// The VPValue of the condition for the block. - VPValue *CondOp; /// Fast math flags to use for the resulting reduction operation. bool NoNaN; /// Flag for whether to use reduction intrinsics vs shuffle expansions. @@ -1098,15 +1053,23 @@ VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool NoNaN, bool UseReductionIntrinsic) - : VPRecipeBase(VPReductionSC), RdxDesc(R), I(I), VecOp(VecOp), - ChainOp(ChainOp), CondOp(CondOp), NoNaN(NoNaN), - UseReductionIntrinsic(UseReductionIntrinsic) {} + : VPRecipeBase(VPReductionSC, I, {ChainOp, VecOp}), RdxDesc(R), + NoNaN(NoNaN), UseReductionIntrinsic(UseReductionIntrinsic) { + if (CondOp) + addOperand(CondOp); + } ~VPReductionRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPReductionSC; + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPReductionSC; + } + + VPValue *getChainOp() const { return getOperand(0); } + VPValue *getVecOp() const { return getOperand(1); } + VPValue *getCondOp() const { + return getNumOperands() > 2 ? getOperand(2) : nullptr; } /// Generate the reduction in the loop @@ -1122,18 +1085,10 @@ /// single copy of widened type for all lanes. If the instruction is known to be /// uniform only one copy, per lane zero, will be generated. class VPReplicateRecipe : public VPRecipeBase { - /// The instruction being replicated. - Instruction *Ingredient; - - /// Hold VPValues for the operands of the ingredient. - VPUser User; - /// Indicator if only a single replica per lane is needed. bool IsUniform; - /// Indicator if the replicas are also predicated. bool IsPredicated; - /// Indicator if the scalar values should also be packed into a vector. bool AlsoPack; @@ -1141,8 +1096,8 @@ template VPReplicateRecipe(Instruction *I, iterator_range Operands, bool IsUniform, bool IsPredicated = false) - : VPRecipeBase(VPReplicateSC), Ingredient(I), User(Operands), - IsUniform(IsUniform), IsPredicated(IsPredicated) { + : VPRecipeBase(VPReplicateSC, I, Operands), IsUniform(IsUniform), + IsPredicated(IsPredicated) { // Retain the previous behavior of predicateInstructions(), where an // insert-element of a predicated instruction got hoisted into the // predicated basic block iff it was its only user. This is achieved by @@ -1154,8 +1109,8 @@ ~VPReplicateRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPReplicateSC; + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPReplicateSC; } /// Generate replicas of the desired Ingredient. Replicas will be generated @@ -1163,6 +1118,13 @@ /// the \p State. void execute(VPTransformState &State) override; + Instruction *getIngredient() { + return cast(getUnderlyingValue()); + } + const Instruction *getIngredient() const { + return cast(getUnderlyingValue()); + } + void setAlsoPack(bool Pack) { AlsoPack = Pack; } /// Print the recipe. @@ -1172,17 +1134,16 @@ /// A recipe for generating conditional branches on the bits of a mask. class VPBranchOnMaskRecipe : public VPRecipeBase { - VPUser User; - public: - VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) { + VPBranchOnMaskRecipe(VPValue *BlockInMask) + : VPRecipeBase(VPBranchOnMaskSC, nullptr) { if (BlockInMask) // nullptr means all-one mask. - User.addOperand(BlockInMask); + addOperand(BlockInMask); } /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC; + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPBranchOnMaskSC; } /// Generate the extraction of the appropriate bit from the block mask and the @@ -1203,9 +1164,9 @@ /// Return the mask used by this recipe. Note that a full mask is represented /// by a nullptr. VPValue *getMask() const { - assert(User.getNumOperands() <= 1 && "should have either 0 or 1 operands"); + assert(getNumOperands() <= 1 && "should have either 0 or 1 operands"); // Mask is optional. - return User.getNumOperands() == 1 ? User.getOperand(0) : nullptr; + return getNumOperands() == 1 ? getOperand(0) : nullptr; } }; @@ -1215,18 +1176,16 @@ /// The phi nodes can be scalar or vector depending on the users of the value. /// This recipe works in concert with VPBranchOnMaskRecipe. class VPPredInstPHIRecipe : public VPRecipeBase { - Instruction *PredInst; - public: /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi /// nodes after merging back from a Branch-on-Mask. VPPredInstPHIRecipe(Instruction *PredInst) - : VPRecipeBase(VPPredInstPHISC), PredInst(PredInst) {} + : VPRecipeBase(VPPredInstPHISC, PredInst) {} ~VPPredInstPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPPredInstPHISC; + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPPredInstPHISC; } /// Generates phi nodes for live-outs as needed to retain SSA form. @@ -1235,6 +1194,11 @@ /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; + + Instruction *getPredInst() { return cast(getUnderlyingValue()); } + const Instruction *getPredInst() const { + return cast(getUnderlyingValue()); + } }; /// A Recipe for widening load/store operations. @@ -1243,59 +1207,54 @@ /// - For store: Address, stored value, optional mask /// TODO: We currently execute only per-part unless a specific instance is /// provided. -class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPValue { - VPUser User; - +class VPWidenMemoryInstructionRecipe : public VPRecipeBase { void setMask(VPValue *Mask) { if (!Mask) return; - User.addOperand(Mask); + addOperand(Mask); } bool isMasked() const { return (isa(getUnderlyingInstruction()) && - User.getNumOperands() == 2) || + getNumOperands() == 2) || (isa(getUnderlyingInstruction()) && - User.getNumOperands() == 3); + getNumOperands() == 3); } public: VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC), - VPValue(VPValue::VPMemoryInstructionSC, &Load), User({Addr}) { + : VPRecipeBase(VPWidenMemoryInstructionSC, &Load, {Addr}) { setMask(Mask); } VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredValue, VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC), - VPValue(VPValue::VPMemoryInstructionSC, &Store), - User({Addr, StoredValue}) { + : VPRecipeBase(VPWidenMemoryInstructionSC, &Store, {Addr, StoredValue}) { setMask(Mask); } /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC; + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPWidenMemoryInstructionSC; } /// Return the address accessed by this recipe. VPValue *getAddr() const { - return User.getOperand(0); // Address is the 1st, mandatory operand. + return getOperand(0); // Address is the 1st, mandatory operand. } /// Return the mask used by this recipe. Note that a full mask is represented /// by a nullptr. VPValue *getMask() const { // Mask is optional and therefore the last operand. - return isMasked() ? User.getOperand(User.getNumOperands() - 1) : nullptr; + return isMasked() ? getOperand(getNumOperands() - 1) : nullptr; } /// Return the address accessed by this recipe. VPValue *getStoredValue() const { assert(isa(getUnderlyingInstruction()) && "Stored value only available for store instructions"); - return User.getOperand(1); // Stored value is the 2nd, mandatory operand. + return getOperand(1); // Stored value is the 2nd, mandatory operand. } /// Generate the wide load/store. @@ -1315,21 +1274,13 @@ /// A Recipe for widening the canonical induction variable of the vector loop. class VPWidenCanonicalIVRecipe : public VPRecipeBase { - /// A VPValue representing the canonical vector IV. - VPValue Val; - public: - VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC) {} + VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC, nullptr) {} ~VPWidenCanonicalIVRecipe() override = default; - /// Return the VPValue representing the canonical vector induction variable of - /// the vector loop. - const VPValue *getVPValue() const { return &Val; } - VPValue *getVPValue() { return &Val; } - /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *V) { - return V->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC; + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPWidenCanonicalIVSC; } /// Generate a canonical vector induction variable of the vector loop, with @@ -1741,6 +1692,21 @@ Value2VPValue[V] = VPV; } + void addOrReplaceVPValue(Value *V, VPValue *VPV) { + assert(V && "Trying to get or add the VPValue of a null Value"); + if (!Value2VPValue.count(V)) { + addVPValue(V, VPV); + return; + } + VPValue *OldVPV = Value2VPValue[V]; + Value2VPValue[V] = VPV; + auto It = find(VPValuesToFree, OldVPV); + if (It != VPValuesToFree.end()) { + delete OldVPV; + VPValuesToFree.erase(It); + } + } + VPValue *getVPValue(Value *V) { assert(V && "Trying to get the VPValue of a null Value"); assert(Value2VPValue.count(V) && "Value does not exist in VPlan"); Index: llvm/lib/Transforms/Vectorize/VPlan.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.cpp +++ llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -153,10 +153,10 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() { iterator It = begin(); while (It != end() && - (It->getVPRecipeID() == VPRecipeBase::VPWidenPHISC || - It->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC || - It->getVPRecipeID() == VPRecipeBase::VPPredInstPHISC || - It->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC)) + (It->getVPValueID() == VPRecipeBase::VPWidenPHISC || + It->getVPValueID() == VPRecipeBase::VPWidenIntOrFpInductionSC || + It->getVPValueID() == VPRecipeBase::VPPredInstPHISC || + It->getVPValueID() == VPRecipeBase::VPWidenCanonicalIVSC)) It++; return It; } @@ -354,6 +354,18 @@ return getParent()->getRecipeList().erase(getIterator()); } +void VPRecipeBase::recursivelyDeleteUnusedRecipes() { + if (getNumUsers() == 0 /* && isSafeToRemove()*/) { + for (auto *Op : operands()) { + Op->removeUser(this); + if (VPRecipeBase *R = dyn_cast(Op)) + R->recursivelyDeleteUnusedRecipes(); + } + clearOperands(); + eraseFromParent(); + } +} + void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { removeFromParent(); insertAfter(InsertPos); @@ -407,6 +419,32 @@ State.set(this, Call, Part); break; } + case VPInstruction::Mulhs: + case VPInstruction::Mulhu: { + // TODOD: Create a Mulh instrinsic instead? + Value *A = State.get(getOperand(0), Part); + Value *B = State.get(getOperand(1), Part); + + auto Ty = cast(A->getType()); + unsigned BW = Ty->getScalarSizeInBits(); + auto *WidenTy = VectorType::getExtendedElementVectorType(Ty); + + auto *ExtA = getOpcode() == VPInstruction::Mulhs + ? Builder.CreateSExt(A, WidenTy) + : Builder.CreateZExt(A, WidenTy); + auto *ExtB = getOpcode() == VPInstruction::Mulhs + ? Builder.CreateSExt(B, WidenTy) + : Builder.CreateZExt(B, WidenTy); + auto *Mul = Builder.CreateMul(ExtA, ExtB); + auto *Shift = Builder.CreateLShr( + Mul, ConstantDataVector::getSplat( + State.VF.getKnownMinValue(), + ConstantInt::get(WidenTy->getElementType(), BW))); + auto *Trunc = Builder.CreateTrunc(Shift, Ty); + + State.set(this, Trunc, Part); + break; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -451,6 +489,12 @@ case VPInstruction::ActiveLaneMask: O << "active lane mask"; break; + case VPInstruction::Mulhs: + O << "mulhs"; + break; + case VPInstruction::Mulhu: + O << "mulhu"; + break; default: O << Instruction::getOpcodeName(getOpcode()); @@ -759,18 +803,18 @@ O << "\"WIDEN "; printAsOperand(O, SlotTracker); O << " = select "; - User.getOperand(0)->printAsOperand(O, SlotTracker); + getOperand(0)->printAsOperand(O, SlotTracker); O << ", "; - User.getOperand(1)->printAsOperand(O, SlotTracker); + getOperand(1)->printAsOperand(O, SlotTracker); O << ", "; - User.getOperand(2)->printAsOperand(O, SlotTracker); + getOperand(2)->printAsOperand(O, SlotTracker); O << (InvariantCond ? " (condition is loop invariant)" : ""); } void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << "\"WIDEN\\l\""; - O << "\" " << VPlanIngredient(&Ingredient); + O << "\" " << VPlanIngredient(getUnderlyingInstruction()); } void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, @@ -778,10 +822,10 @@ O << "\"WIDEN-INDUCTION"; if (Trunc) { O << "\\l\""; - O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; + O << " +\n" << Indent << "\" " << VPlanIngredient(getIV()) << "\\l\""; O << " +\n" << Indent << "\" " << VPlanIngredient(Trunc); } else - O << " " << VPlanIngredient(IV); + O << " " << VPlanIngredient(getIV()); } void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, @@ -798,13 +842,13 @@ void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << "\"WIDEN-PHI " << VPlanIngredient(Phi); + O << "\"WIDEN-PHI " << VPlanIngredient(getPhi()); } void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << "\"BLEND "; - Phi->printAsOperand(O, false); + getPhi()->printAsOperand(O, false); O << " ="; if (getNumIncomingValues() == 1) { // Not a User of any mask: not really blending, this is a @@ -823,13 +867,13 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << "\"REDUCE of" << *I << " as "; - ChainOp->printAsOperand(O, SlotTracker); + O << "\"REDUCE of" << *getUnderlyingValue() << " as "; + getChainOp()->printAsOperand(O, SlotTracker); O << " + reduce("; - VecOp->printAsOperand(O, SlotTracker); - if (CondOp) { + getVecOp()->printAsOperand(O, SlotTracker); + if (getCondOp()) { O << ", "; - CondOp->printAsOperand(O, SlotTracker); + getCondOp()->printAsOperand(O, SlotTracker); } O << ")"; } @@ -837,14 +881,14 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << "\"" << (IsUniform ? "CLONE " : "REPLICATE ") - << VPlanIngredient(Ingredient); + << VPlanIngredient(getIngredient()); if (AlsoPack) O << " (S->V)"; } void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << "\"PHI-PREDICATED-INSTRUCTION " << VPlanIngredient(PredInst); + O << "\"PHI-PREDICATED-INSTRUCTION " << VPlanIngredient(getPredInst()); } void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, @@ -879,14 +923,14 @@ Constant *VStep = VF == 1 ? Indices.back() : ConstantVector::get(Indices); // Add the consecutive indices to the vector value. Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); - State.set(getVPValue(), CanonicalVectorIV, Part); + State.set(this, CanonicalVectorIV, Part); } } void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << "\"EMIT "; - getVPValue()->printAsOperand(O, SlotTracker); + printAsOperand(O, SlotTracker); O << " = WIDEN-CANONICAL-INDUCTION"; } @@ -897,6 +941,7 @@ for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) if (User->getOperand(I) == this) User->setOperand(I, New); + Users.clear(); } void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const { @@ -990,7 +1035,7 @@ if (const auto *VPI = dyn_cast(&Recipe)) assignSlot(VPI); else if (const auto *VPIV = dyn_cast(&Recipe)) - assignSlot(VPIV->getVPValue()); + assignSlot(VPIV); } } Index: llvm/lib/Transforms/Vectorize/VPlanValue.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlanValue.h +++ llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -61,10 +61,12 @@ // for multiple underlying IRs (Polly?) by providing a new VPlan front-end, // back-end and analysis information for the new IR. +public: /// Return the underlying Value attached to this VPValue. Value *getUnderlyingValue() { return UnderlyingVal; } const Value *getUnderlyingValue() const { return UnderlyingVal; } +protected: // Set \p Val as the underlying Value of this VPValue. void setUnderlyingValue(Value *Val) { assert(!UnderlyingVal && "Underlying Value is already set."); @@ -79,12 +81,7 @@ enum { VPValueSC, VPMultiValueSC, - VPInstructionSC, - VPMemoryInstructionSC, - VPVWidenCallSC, - VPVWidenSelectSC, - VPVWidenGEPSC, - VPVInterleaveSC + VPValueLastID // Last ID }; VPValue(Value *UV = nullptr) : VPValue(VPValueSC, UV) {} @@ -100,11 +97,22 @@ virtual VPValue *getDefiningValue() { return this; } virtual VPValue const *getDefiningValue() const { return this; } + Type *getType() { + if (UnderlyingVal) + return UnderlyingVal->getType(); + return nullptr; + } + void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const; void print(raw_ostream &OS, VPSlotTracker &Tracker) const; unsigned getNumUsers() const { return Users.size(); } virtual void addUser(VPUser &User) { Users.push_back(&User); } + void removeUser(VPUser *User) { + auto It = find(Users, User); + assert(It != Users.end() && "Expected User in Users"); + Users.erase(It); + } typedef SmallVectorImpl::iterator user_iterator; typedef SmallVectorImpl::const_iterator const_user_iterator; @@ -165,19 +173,27 @@ /// This class augments VPValue with operands which provide the inverse def-use /// edges from VPValue's users to their defs. -class VPUser { +class VPUser : public VPValue { SmallVector Operands; +protected: + void clearOperands() { Operands.clear(); } + public: - VPUser() {} - VPUser(ArrayRef Operands) { + VPUser() : VPValue(VPValueSC) {} + VPUser(const unsigned char SC, Value *UV = nullptr) : VPValue(SC, UV) {} + VPUser(const unsigned char SC, Value *UV, ArrayRef Operands) + : VPValue(SC, UV) { for (VPValue *Operand : Operands) addOperand(Operand); } - VPUser(std::initializer_list Operands) - : VPUser(ArrayRef(Operands)) {} - template VPUser(iterator_range Operands) { + VPUser(const unsigned char SC, Value *UV, + std::initializer_list Operands) + : VPUser(SC, UV, ArrayRef(Operands)) {} + template + VPUser(const unsigned char SC, Value *UV, iterator_range Operands) + : VPValue(SC, UV) { for (VPValue *Operand : Operands) addOperand(Operand); } Index: llvm/unittests/Transforms/Vectorize/VPlanTest.cpp =================================================================== --- llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -247,7 +247,7 @@ } } -struct VPTestMultiValueDef : public VPUser, public VPValue { +struct VPTestMultiValueDef : public VPUser { SmallVector Defs; };