Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -292,8 +292,7 @@ /// Build a VPlan using VPRecipes according to the information gather by /// Legal. This method is only used for the legacy inner loop vectorizer. VPlanPtr buildVPlanWithVPRecipes( - VFRange &Range, SmallPtrSetImpl &NeedDef, - SmallPtrSetImpl &DeadInstructions, + VFRange &Range, SmallPtrSetImpl &DeadInstructions, const DenseMap &SinkAfter); /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, @@ -307,6 +306,8 @@ /// reduction chain. void adjustRecipesForInLoopReductions(VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder); + bool adjustVPlanForVectorPatterns(VPlanPtr &Plan, + VPRecipeBuilder &RecipeBuilder); }; } // namespace llvm Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -152,6 +152,8 @@ #include #include +#include + using namespace llvm; #define LV_NAME "loop-vectorize" @@ -555,7 +557,6 @@ /// non-null. Use \p State to translate given VPValues to IR values in the /// vectorized loop. void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State, - VPValue *Def, VPValue *Addr, VPValue *StoredValue, VPValue *BlockInMask); @@ -7254,7 +7255,7 @@ if (!BI->isConditional() || BI->getSuccessor(0) == BI->getSuccessor(1)) return EdgeMaskCache[Edge] = SrcMask; - VPValue *EdgeMask = Plan->getVPValue(BI->getCondition()); + VPValue *EdgeMask = Plan->getOrAddVPValue(BI->getCondition()); assert(EdgeMask && "No Edge Mask found for condition"); if (BI->getSuccessor(0) != Dst) @@ -7292,11 +7293,11 @@ // Start by constructing the desired canonical IV. VPValue *IV = nullptr; if (Legal->getPrimaryInduction()) - IV = Plan->getVPValue(Legal->getPrimaryInduction()); + IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction()); else { auto IVRecipe = new VPWidenCanonicalIVRecipe(); Builder.getInsertBlock()->insert(IVRecipe, NewInsertionPoint); - IV = IVRecipe->getVPValue(); + IV = IVRecipe; } VPValue *BTC = Plan->getOrCreateBackedgeTakenCount(); bool TailFolded = !CM.isScalarEpilogueAllowed(); @@ -7539,7 +7540,10 @@ return nullptr; // Success: widen this instruction. - return new VPWidenRecipe(*I, Plan.mapToVPValues(I->operands())); + VPWidenRecipe *Recipe = + new VPWidenRecipe(*I, Plan.mapToVPValues(I->operands())); + Plan.addVPValue(I, Recipe); + return Recipe; } VPBasicBlock *VPRecipeBuilder::handleReplication( @@ -7662,32 +7666,6 @@ unsigned MaxVF) { assert(OrigLoop->empty() && "Inner loop expected."); - // Collect conditions feeding internal conditional branches; they need to be - // represented in VPlan for it to model masking. - SmallPtrSet NeedDef; - - auto *Latch = OrigLoop->getLoopLatch(); - for (BasicBlock *BB : OrigLoop->blocks()) { - if (BB == Latch) - continue; - BranchInst *Branch = dyn_cast(BB->getTerminator()); - if (Branch && Branch->isConditional()) - NeedDef.insert(Branch->getCondition()); - } - - // If the tail is to be folded by masking, the primary induction variable, if - // exists needs to be represented in VPlan for it to model early-exit masking. - // Also, both the Phi and the live-out instruction of each reduction are - // required in order to introduce a select between them in VPlan. - if (CM.foldTailByMasking()) { - if (Legal->getPrimaryInduction()) - NeedDef.insert(Legal->getPrimaryInduction()); - for (auto &Reduction : Legal->getReductionVars()) { - NeedDef.insert(Reduction.first); - NeedDef.insert(Reduction.second.getLoopExitInstr()); - } - } - // Collect instructions from the original loop that will become trivially dead // in the vectorized loop. We don't need to vectorize these instructions. For // example, original induction update instructions can become dead because we @@ -7711,15 +7689,14 @@ for (unsigned VF = MinVF; VF < MaxVF + 1;) { VFRange SubRange = {VF, MaxVF + 1}; - VPlans.push_back(buildVPlanWithVPRecipes(SubRange, NeedDef, - DeadInstructions, SinkAfter)); + VPlans.push_back( + buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter)); VF = SubRange.End; } } VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( - VFRange &Range, SmallPtrSetImpl &NeedDef, - SmallPtrSetImpl &DeadInstructions, + VFRange &Range, SmallPtrSetImpl &DeadInstructions, const DenseMap &SinkAfter) { // Hold a mapping from predicated instructions to their recipes, in order to @@ -7808,10 +7785,6 @@ VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry"); Plan->setEntry(VPBB); - // Represent values that will have defs inside VPlan. - for (Value *V : NeedDef) - Plan->addVPValue(V); - // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. LoopBlocksDFS DFS(OrigLoop); @@ -7880,8 +7853,10 @@ } // Adjust the recipes for any inloop reductions. - if (Range.Start > 1) + if (Range.Start > 1) { adjustRecipesForInLoopReductions(Plan, RecipeBuilder); + adjustVPlanForVectorPatterns(Plan, RecipeBuilder); + } // Finally, if tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the end of the latch. @@ -7891,8 +7866,8 @@ for (auto &Reduction : Legal->getReductionVars()) { if (CM.isInLoopReduction(Reduction.first)) continue; - VPValue *Phi = Plan->getVPValue(Reduction.first); - VPValue *Red = Plan->getVPValue(Reduction.second.getLoopExitInstr()); + VPValue *Phi = Plan->getOrAddVPValue(Reduction.first); + VPValue *Red = Plan->getOrAddVPValue(Reduction.second.getLoopExitInstr()); Builder.createNaryOp(Instruction::Select, {Cond, Red, Phi}); } } @@ -7966,7 +7941,7 @@ VPRecipeBase *WidenRecipe = RecipeBuilder.getRecipe(R); RecurrenceDescriptor::RecurrenceKind Kind = RdxDesc.getRecurrenceKind(); - VPValue *ChainOp = Plan->getVPValue(Chain); + VPValue *ChainOp = Plan->getOrAddVPValue(Chain); unsigned FirstOpId; if (Kind == RecurrenceDescriptor::RK_IntegerMinMax || Kind == RecurrenceDescriptor::RK_FloatMinMax) { @@ -7980,7 +7955,7 @@ } unsigned VecOpId = R->getOperand(FirstOpId) == Chain ? FirstOpId + 1 : FirstOpId; - VPValue *VecOp = Plan->getVPValue(R->getOperand(VecOpId)); + VPValue *VecOp = Plan->getOrAddVPValue(R->getOperand(VecOpId)); bool UseReductionIntrinsic = TTI->useReductionIntrinsic( RdxDesc.getRecurrenceBinOp(Kind), RdxDesc.getRecurrenceType(), @@ -7993,6 +7968,7 @@ UseReductionIntrinsic); WidenRecipe->getParent()->insert(RedRecipe, WidenRecipe->getIterator()); WidenRecipe->eraseFromParent(); + Plan->addOrReplaceVPValue(R, RedRecipe); if (Kind == RecurrenceDescriptor::RK_IntegerMinMax || Kind == RecurrenceDescriptor::RK_FloatMinMax) { @@ -8007,6 +7983,131 @@ } } +using namespace PatternMatch; + +template +struct VPBinaryOp_match { + LHS_t L; + RHS_t R; + + VPBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} + + template bool match(OpTy *V) { + const VPWidenRecipe *VW = dyn_cast(V); + if (VW && VW->getOpcode() == Opcode) + return L.match(VW->getOperand(0)) && R.match(VW->getOperand(1)); + // TODOD: VPInstruction? + return false; + } +}; +template +inline VPBinaryOp_match vp_Mul(const LHS &L, + const RHS &R) { + return VPBinaryOp_match(L, R); +} +template +inline VPBinaryOp_match vp_LShr(const LHS &L, + const RHS &R) { + return VPBinaryOp_match(L, R); +} + +template struct VPUnaryOp_match { + LHS_t L; + + VPUnaryOp_match(const LHS_t &LHS) : L(LHS) {} + + template bool match(OpTy *V) { + const VPWidenRecipe *VW = dyn_cast(V); + if (VW && VW->getOpcode() == Opcode) + return L.match(VW->getOperand(0)); + return false; + } +}; +template +inline VPUnaryOp_match vp_Trunc(const LHS &L) { + return VPUnaryOp_match(L); +} +template +inline VPUnaryOp_match vp_SExt(const LHS &L) { + return VPUnaryOp_match(L); +} +template +inline VPUnaryOp_match vp_ZExt(const LHS &L) { + return VPUnaryOp_match(L); +} + +inline bind_ty vp_Value(VPValue *&V) { return V; } +inline bind_ty vp_Value(const VPValue *&V) { return V; } + +struct VPspecific_intval { + APInt Val; + + VPspecific_intval(APInt V) : Val(std::move(V)) {} + + template bool match(ITy *V) { + const auto *CI = dyn_cast(V->getUnderlyingValue()); + return CI && APInt::isSameValue(CI->getValue(), Val); + } +}; +inline VPspecific_intval vp_SpecificInt(uint64_t V) { + return VPspecific_intval(APInt(64, V)); +} + +// trunc(lsr(mul(ext, ext)), BW)) -> mulhs/mulhu +static bool tryCombineToMulh(VPBasicBlock::reverse_iterator &It, VPlanPtr &Plan, + VPRecipeBuilder &RecipeBuilder) { + VPWidenRecipe *RV = dyn_cast(&*It); + if (!RV) + return false; + + Type *Ty = RV->getType(); + unsigned BW = Ty->getScalarSizeInBits(); + + VPValue *Mul; + // TODOD: One use checks? + if (!match(RV, vp_Trunc(vp_LShr(vp_Value(Mul), vp_SpecificInt(BW))))) + return false; + + VPValue *A, *B; + unsigned Opcode = 0; + if (match(Mul, vp_Mul(vp_SExt(vp_Value(A)), vp_SExt(vp_Value(B))))) + Opcode = VPInstruction::Mulhs; + else if (match(Mul, vp_Mul(vp_ZExt(vp_Value(A)), vp_ZExt(vp_Value(B))))) + Opcode = VPInstruction::Mulhu; + else + return false; + + unsigned MulBW = Mul->getType()->getScalarSizeInBits(); + if (MulBW > BW * 2) + return false; + + if (A->getType() != Ty || B->getType() != Ty) // TODOD: Generalize + return false; + + auto *Mulh = new VPInstruction(Opcode, {A, B}); + RV->getParent()->insert(Mulh, RV->getIterator()); + RV->replaceAllUsesWith(Mulh); + RV->recursivelyDeleteUnusedRecipes(); + It = Mulh->getReverseIterator(); + return true; +} + +bool LoopVectorizationPlanner::adjustVPlanForVectorPatterns( + VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder) { + bool Changed = false; + for (VPBlockBase *Block : depth_first(Plan->getEntry())) { + if (auto *BB = dyn_cast(Block)) { + for (VPBasicBlock::reverse_iterator It = BB->rbegin(), E = BB->rend(); + It != E; ++It) { + if (true /*TODOD: TTI->hasVectorPattern(Mulh)*/) + Changed |= tryCombineToMulh(It, Plan, RecipeBuilder); + } + } + } + + return Changed; +} + Value* LoopVectorizationPlanner::VPCallbackILV:: getOrCreateVectorValues(Value *V, unsigned Part) { return ILV.getOrCreateVectorValue(V, Part); @@ -8034,35 +8135,36 @@ } void VPWidenCallRecipe::execute(VPTransformState &State) { - State.ILV->widenCallInstruction(*getUnderlyingCall(), this, User, State); + State.ILV->widenCallInstruction(*getUnderlyingCall(), this, *this, State); } void VPWidenSelectRecipe::execute(VPTransformState &State) { - State.ILV->widenSelectInstruction(*getUnderlyingInstruction(), this, User, + State.ILV->widenSelectInstruction(*getUnderlyingInstruction(), this, *this, InvariantCond, State); } void VPWidenRecipe::execute(VPTransformState &State) { - State.ILV->widenInstruction(Ingredient, User, State); + State.ILV->widenInstruction(*getUnderlyingInstruction(), *this, + State); } void VPWidenGEPRecipe::execute(VPTransformState &State) { - State.ILV->widenGEP(getUnderlyingInstruction(), this, User, State.UF, + State.ILV->widenGEP(getUnderlyingInstruction(), this, *this, State.UF, State.VF, IsPtrLoopInvariant, IsIndexLoopInvariant, State); } void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { assert(!State.Instance && "Int or FP induction being replicated."); - State.ILV->widenIntOrFpInduction(IV, Trunc); + State.ILV->widenIntOrFpInduction(getIV(), Trunc); } void VPWidenPHIRecipe::execute(VPTransformState &State) { - State.ILV->widenPHIInstruction(Phi, State.UF, State.VF); + State.ILV->widenPHIInstruction(getPhi(), State.UF, State.VF); } void VPBlendRecipe::execute(VPTransformState &State) { - State.ILV->setDebugLocFromInst(State.Builder, Phi); + State.ILV->setDebugLocFromInst(State.Builder, getPhi()); // We know that all PHIs in non-header blocks are converted into // selects, so we don't have to worry about the insertion order and we // can just use the builder. @@ -8097,7 +8199,7 @@ } } for (unsigned Part = 0; Part < State.UF; ++Part) - State.ValueMap.setVectorValue(Phi, Part, Entry[Part]); + State.ValueMap.setVectorValue(getPhi(), Part, Entry[Part]); } void VPInterleaveRecipe::execute(VPTransformState &State) { @@ -8110,9 +8212,9 @@ assert(!State.Instance && "Reduction being replicated."); for (unsigned Part = 0; Part < State.UF; ++Part) { RecurrenceDescriptor::RecurrenceKind Kind = RdxDesc->getRecurrenceKind(); - Value *NewVecOp = State.get(VecOp, Part); - if (CondOp) { - Value *NewCond = State.get(CondOp, Part); + Value *NewVecOp = State.get(getVecOp(), Part); + if (getCondOp()) { + Value *NewCond = State.get(getCondOp(), Part); VectorType *VecTy = cast(NewVecOp->getType()); Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity( Kind, RdxDesc->getMinMaxRecurrenceKind(), VecTy->getElementType()); @@ -8123,7 +8225,7 @@ } Value *NewRed = createTargetReduction(State.Builder, *RdxDesc, NewVecOp, UseReductionIntrinsic, NoNaN); - Value *PrevInChain = State.get(ChainOp, Part); + Value *PrevInChain = State.get(getChainOp(), Part); Value *NextInChain; if (Kind == RecurrenceDescriptor::RK_IntegerMinMax || Kind == RecurrenceDescriptor::RK_FloatMinMax) { @@ -8132,26 +8234,30 @@ NewRed, PrevInChain); } else { NextInChain = State.Builder.CreateBinOp( - (Instruction::BinaryOps)I->getOpcode(), NewRed, PrevInChain); + (Instruction::BinaryOps)cast(getUnderlyingValue()) + ->getOpcode(), + NewRed, PrevInChain); } - State.ValueMap.setVectorValue(I, Part, NextInChain); + State.ValueMap.setVectorValue(getUnderlyingValue(), Part, NextInChain); } } void VPReplicateRecipe::execute(VPTransformState &State) { if (State.Instance) { // Generate a single instance. - State.ILV->scalarizeInstruction(Ingredient, User, *State.Instance, + + State.ILV->scalarizeInstruction(getIngredient(), *this, *State.Instance, IsPredicated, State); // Insert scalar instance packing it into a vector. if (AlsoPack && State.VF.isVector()) { // If we're constructing lane 0, initialize to start from undef. if (State.Instance->Lane == 0) { assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); - Value *Undef = - UndefValue::get(VectorType::get(Ingredient->getType(), State.VF)); - State.ValueMap.setVectorValue(Ingredient, State.Instance->Part, Undef); + Value *Undef = UndefValue::get( + VectorType::get(getIngredient()->getType(), State.VF)); + State.ValueMap.setVectorValue(getIngredient(), State.Instance->Part, + Undef); } - State.ILV->packScalarIntoVectorValue(Ingredient, *State.Instance); + State.ILV->packScalarIntoVectorValue(getIngredient(), *State.Instance); } return; } @@ -8162,7 +8268,7 @@ unsigned EndLane = IsUniform ? 1 : State.VF.getKnownMinValue(); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(Ingredient, User, {Part, Lane}, + State.ILV->scalarizeInstruction(getIngredient(), *this, {Part, Lane}, IsPredicated, State); } @@ -8195,7 +8301,7 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) { assert(State.Instance && "Predicated instruction PHI works per instance."); Instruction *ScalarPredInst = cast( - State.ValueMap.getScalarValue(PredInst, *State.Instance)); + State.ValueMap.getScalarValue(getPredInst(), *State.Instance)); BasicBlock *PredicatedBB = ScalarPredInst->getParent(); BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); assert(PredicatingBB && "Predicated block has no single predecessor."); @@ -8207,19 +8313,19 @@ // also do that packing, thereby "hoisting" the insert-element sequence. // Otherwise, a phi node for the scalar value is needed. unsigned Part = State.Instance->Part; - if (State.ValueMap.hasVectorValue(PredInst, Part)) { - Value *VectorValue = State.ValueMap.getVectorValue(PredInst, Part); + if (State.ValueMap.hasVectorValue(getPredInst(), Part)) { + Value *VectorValue = State.ValueMap.getVectorValue(getPredInst(), Part); InsertElementInst *IEI = cast(VectorValue); PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. - State.ValueMap.resetVectorValue(PredInst, Part, VPhi); // Update cache. + State.ValueMap.resetVectorValue(getPredInst(), Part, VPhi); // Update cache. } else { - Type *PredInstType = PredInst->getType(); + Type *PredInstType = getPredInst()->getType(); PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); Phi->addIncoming(UndefValue::get(ScalarPredInst->getType()), PredicatingBB); Phi->addIncoming(ScalarPredInst, PredicatedBB); - State.ValueMap.resetScalarValue(PredInst, *State.Instance, Phi); + State.ValueMap.resetScalarValue(getPredInst(), *State.Instance, Phi); } } Index: llvm/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.h +++ llvm/lib/Transforms/Vectorize/VPlan.h @@ -641,9 +641,6 @@ VPRecipeBase(const unsigned char SC) : SubclassID(SC) {} virtual ~VPRecipeBase() = default; - /// \return an ID for the concrete type of this object. - /// This is used to implement the classof checks. This should not be used - /// for any other purpose, as the values may change as LLVM evolves. unsigned getVPRecipeID() const { return SubclassID; } /// \return the VPBasicBlock which this VPRecipe belongs to. @@ -678,6 +675,12 @@ /// /// \returns an iterator pointing to the element after the erased one iplist::iterator eraseFromParent(); + + void recursivelyDeleteUnusedRecipes(); + + static inline bool classof(const VPValue *V) { + return V->getVPValueID() >= VPValue::VPInstructionSC; + } }; /// This is a concrete Recipe that models a single VPlan-level instruction. @@ -695,6 +698,8 @@ SLPLoad, SLPStore, ActiveLaneMask, + Mulhs, + Mulhu, }; private: @@ -724,15 +729,8 @@ static inline bool classof(const VPValue *V) { return V->getVPValueID() == VPValue::VPInstructionSC; } - - VPInstruction *clone() const { - SmallVector Operands(operands()); - return new VPInstruction(Opcode, Operands); - } - - /// Method to support type inquiry through isa, cast, and dyn_cast. - static inline bool classof(const VPRecipeBase *R) { - return R->getVPRecipeID() == VPRecipeBase::VPInstructionSC; + static inline bool classof(const VPRecipeBase *V) { + return V->getVPRecipeID() == VPRecipeBase::VPInstructionSC; } unsigned getOpcode() const { return Opcode; } @@ -782,21 +780,19 @@ /// VPWidenRecipe is a recipe for producing a copy of vector type its /// ingredient. This recipe covers most of the traditional vectorization cases /// where each ingredient transforms into a vectorized version of itself. -class VPWidenRecipe : public VPRecipeBase { - /// Hold the instruction to be widened. - Instruction &Ingredient; - - /// Hold VPValues for the operands of the ingredient. - VPUser User; - +class VPWidenRecipe : public VPRecipeBase, public VPValue, public VPUser { public: template VPWidenRecipe(Instruction &I, iterator_range Operands) - : VPRecipeBase(VPWidenSC), Ingredient(I), User(Operands) {} + : VPRecipeBase(VPRecipeBase::VPWidenSC), VPValue(VPValue::VPWidenSC, &I), + VPUser(Operands) {} ~VPWidenRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPWidenSC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPWidenSC; } @@ -807,22 +803,30 @@ /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; + + Instruction *getUnderlyingInstruction() { + return cast(getUnderlyingValue()); + } + const Instruction *getUnderlyingInstruction() const { + return cast(getUnderlyingValue()); + } + unsigned getOpcode() const { return getUnderlyingInstruction()->getOpcode(); } }; /// A recipe for widening Call instructions. -class VPWidenCallRecipe : public VPRecipeBase, public VPValue { - /// Hold VPValues for the arguments of the call. - VPUser User; - +class VPWidenCallRecipe : public VPRecipeBase, public VPValue, public VPUser { public: template VPWidenCallRecipe(CallInst &I, iterator_range CallArguments) - : VPRecipeBase(VPWidenCallSC), VPValue(VPValue::VPVWidenCallSC, &I), - User(CallArguments) {} + : VPRecipeBase(VPRecipeBase::VPWidenCallSC), + VPValue(VPValue::VPWidenCallSC, &I), VPUser(CallArguments) {} ~VPWidenCallRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPWidenCallSC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPWidenCallSC; } @@ -841,11 +845,8 @@ }; /// A recipe for widening select instructions. -class VPWidenSelectRecipe : public VPRecipeBase, public VPValue { +class VPWidenSelectRecipe : public VPRecipeBase, public VPValue, public VPUser { private: - /// Hold VPValues for the operands of the select. - VPUser User; - /// Is the condition of the select loop invariant? bool InvariantCond; @@ -853,12 +854,16 @@ template VPWidenSelectRecipe(SelectInst &I, iterator_range Operands, bool InvariantCond) - : VPRecipeBase(VPWidenSelectSC), VPValue(VPValue::VPVWidenSelectSC, &I), - User(Operands), InvariantCond(InvariantCond) {} + : VPRecipeBase(VPRecipeBase::VPWidenSelectSC), + VPValue(VPValue::VPWidenSelectSC, &I), VPUser(Operands), + InvariantCond(InvariantCond) {} ~VPWidenSelectRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPWidenSelectSC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPWidenSelectSC; } @@ -876,10 +881,7 @@ }; /// A recipe for handling GEP instructions. -class VPWidenGEPRecipe : public VPRecipeBase, public VPValue { - /// Hold VPValues for the base and indices of the GEP. - VPUser User; - +class VPWidenGEPRecipe : public VPRecipeBase, public VPValue, public VPUser { bool IsPtrLoopInvariant; SmallBitVector IsIndexLoopInvariant; @@ -887,8 +889,9 @@ template VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range Operands, Loop *OrigLoop) - : VPRecipeBase(VPWidenGEPSC), VPValue(VPValue::VPVWidenGEPSC, GEP), - User(Operands), IsIndexLoopInvariant(GEP->getNumIndices(), false) { + : VPRecipeBase(VPRecipeBase::VPWidenGEPSC), + VPValue(VPValue::VPWidenGEPSC, GEP), VPUser(Operands), + IsIndexLoopInvariant(GEP->getNumIndices(), false) { IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand()); for (auto Index : enumerate(GEP->indices())) IsIndexLoopInvariant[Index.index()] = @@ -897,6 +900,9 @@ ~VPWidenGEPRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPWidenGEPSC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC; } @@ -918,16 +924,19 @@ /// A recipe for handling phi nodes of integer and floating-point inductions, /// producing their vector and scalar values. -class VPWidenIntOrFpInductionRecipe : public VPRecipeBase { - PHINode *IV; +class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue { TruncInst *Trunc; public: VPWidenIntOrFpInductionRecipe(PHINode *IV, TruncInst *Trunc = nullptr) - : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) {} + : VPRecipeBase(VPRecipeBase::VPWidenIntOrFpInductionSC), + VPValue(VPValue::VPWidenIntOrFpInductionSC, IV), Trunc(Trunc) {} ~VPWidenIntOrFpInductionRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPWidenIntOrFpInductionSC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC; } @@ -939,17 +948,23 @@ /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; + + PHINode *getIV() { return cast(getUnderlyingValue()); } + const PHINode *getIV() const { return cast(getUnderlyingValue()); } }; /// A recipe for handling all phi nodes except for integer and FP inductions. -class VPWidenPHIRecipe : public VPRecipeBase { - PHINode *Phi; - +class VPWidenPHIRecipe : public VPRecipeBase, public VPValue { public: - VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {} + VPWidenPHIRecipe(PHINode *Phi) + : VPRecipeBase(VPRecipeBase::VPWidenPHISC), + VPValue(VPValue::VPWidenPHISC, Phi) {} ~VPWidenPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPWidenPHISC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPWidenPHISC; } @@ -960,21 +975,18 @@ /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; + + PHINode *getPhi() { return cast(getUnderlyingValue()); } + const PHINode *getPhi() const { return cast(getUnderlyingValue()); } }; /// A recipe for vectorizing a phi-node as a sequence of mask-based select /// instructions. -class VPBlendRecipe : public VPRecipeBase { - PHINode *Phi; - - /// The blend operation is a User of the incoming values and of their - /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value - /// might be incoming with a full mask for which there is no VPValue. - VPUser User; - +class VPBlendRecipe : public VPRecipeBase, public VPValue, public VPUser { public: VPBlendRecipe(PHINode *Phi, ArrayRef Operands) - : VPRecipeBase(VPBlendSC), Phi(Phi), User(Operands) { + : VPRecipeBase(VPRecipeBase::VPBlendSC), VPValue(VPValue::VPBlendSC, Phi), + VPUser(Operands) { assert(Operands.size() > 0 && ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && "Expected either a single incoming value or a positive even number " @@ -982,23 +994,25 @@ } /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPBlendSC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPBlendSC; } /// Return the number of incoming values, taking into account that a single /// incoming value has no mask. - unsigned getNumIncomingValues() const { - return (User.getNumOperands() + 1) / 2; - } + unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; } /// Return incoming value number \p Idx. - VPValue *getIncomingValue(unsigned Idx) const { - return User.getOperand(Idx * 2); - } + VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); } /// Return mask number \p Idx. - VPValue *getMask(unsigned Idx) const { return User.getOperand(Idx * 2 + 1); } + VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); } + + PHINode *getPhi() { return cast(getUnderlyingValue()); } + const PHINode *getPhi() const { return cast(getUnderlyingValue()); } /// Generate the phi/select nodes. void execute(VPTransformState &State) override; @@ -1010,41 +1024,40 @@ /// VPInterleaveRecipe is a recipe for transforming an interleave group of load /// or stores into one wide load/store and shuffles. -class VPInterleaveRecipe : public VPRecipeBase, public VPValue { +class VPInterleaveRecipe : public VPRecipeBase, public VPValue, public VPUser { const InterleaveGroup *IG; - VPUser User; SmallVector Defs; public: VPInterleaveRecipe(const InterleaveGroup *IG, VPValue *Addr, VPValue *Mask) - : VPRecipeBase(VPInterleaveSC), VPValue(VPValue::VPVInterleaveSC), IG(IG), - User({Addr}) { + : VPRecipeBase(VPRecipeBase::VPInterleaveSC), + VPValue(VPValue::VPInterleaveSC), VPUser({Addr}), IG(IG) { if (Mask) - User.addOperand(Mask); + addOperand(Mask); for (unsigned i = 0; i < IG->getNumMembers(); i++) Defs.push_back(new VPMultiValue(this)); } ~VPInterleaveRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPInterleaveSC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC; } - static inline bool classof(const VPValue *V) { - return V->getVPValueID() == VPValue::VPVInterleaveSC; - } /// Return the address accessed by this recipe. VPValue *getAddr() const { - return User.getOperand(0); // Address is the 1st, mandatory operand. + return getOperand(0); // Address is the 1st, mandatory operand. } /// Return the mask used by this recipe. Note that a full mask is represented /// by a nullptr. VPValue *getMask() const { // Mask is optional and therefore the last, currently 2nd operand. - return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr; + return getNumOperands() == 2 ? getOperand(1) : nullptr; } /// Generate the wide load or store, and shuffles. @@ -1060,35 +1073,11 @@ ArrayRef getDefs() { return Defs; } }; -/*template <> struct simplify_type {*/ -// using SimpleType = VPValue *; - -// static SimpleType getSimplifiedValue(VPMultiValue &Val) { -// return Val.getProducer(); -//} -//}; - -// template <> struct simplify_type { -// using SimpleType = VPValue *; - -// static SimpleType getSimplifiedValue(VPMultiValue *&Val) { -// return Val->getProducer(); -//} -//}; - /// A recipe to represent inloop reduction operations, performing a reduction on /// a vector operand into a scalar value, and adding the result to a chain. -class VPReductionRecipe : public VPRecipeBase { +class VPReductionRecipe : public VPRecipeBase, public VPValue, public VPUser { /// The recurrence decriptor for the reduction in question. RecurrenceDescriptor *RdxDesc; - /// The original instruction being converted to a reduction. - Instruction *I; - /// The VPValue of the vector value to be reduced. - VPValue *VecOp; - /// The VPValue of the scalar Chain being accumulated. - VPValue *ChainOp; - /// The VPValue of the condition for the block. - VPValue *CondOp; /// Fast math flags to use for the resulting reduction operation. bool NoNaN; /// Flag for whether to use reduction intrinsics vs shuffle expansions. @@ -1098,17 +1087,29 @@ VPReductionRecipe(RecurrenceDescriptor *R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool NoNaN, bool UseReductionIntrinsic) - : VPRecipeBase(VPReductionSC), RdxDesc(R), I(I), VecOp(VecOp), - ChainOp(ChainOp), CondOp(CondOp), NoNaN(NoNaN), - UseReductionIntrinsic(UseReductionIntrinsic) {} + : VPRecipeBase(VPRecipeBase::VPReductionSC), + VPValue(VPValue::VPReductionSC, I), VPUser({ChainOp, VecOp}), + RdxDesc(R), NoNaN(NoNaN), UseReductionIntrinsic(UseReductionIntrinsic) { + if (CondOp) + addOperand(CondOp); + } ~VPReductionRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPReductionSC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPReductionSC; } + VPValue *getChainOp() const { return getOperand(0); } + VPValue *getVecOp() const { return getOperand(1); } + VPValue *getCondOp() const { + return getNumOperands() > 2 ? getOperand(2) : nullptr; + } + /// Generate the reduction in the loop void execute(VPTransformState &State) override; @@ -1121,19 +1122,11 @@ /// copies of the original scalar type, one per lane, instead of producing a /// single copy of widened type for all lanes. If the instruction is known to be /// uniform only one copy, per lane zero, will be generated. -class VPReplicateRecipe : public VPRecipeBase { - /// The instruction being replicated. - Instruction *Ingredient; - - /// Hold VPValues for the operands of the ingredient. - VPUser User; - +class VPReplicateRecipe : public VPRecipeBase, public VPValue, public VPUser { /// Indicator if only a single replica per lane is needed. bool IsUniform; - /// Indicator if the replicas are also predicated. bool IsPredicated; - /// Indicator if the scalar values should also be packed into a vector. bool AlsoPack; @@ -1141,7 +1134,8 @@ template VPReplicateRecipe(Instruction *I, iterator_range Operands, bool IsUniform, bool IsPredicated = false) - : VPRecipeBase(VPReplicateSC), Ingredient(I), User(Operands), + : VPRecipeBase(VPRecipeBase::VPReplicateSC), + VPValue(VPValue::VPReplicateSC, I), VPUser(Operands), IsUniform(IsUniform), IsPredicated(IsPredicated) { // Retain the previous behavior of predicateInstructions(), where an // insert-element of a predicated instruction got hoisted into the @@ -1154,6 +1148,9 @@ ~VPReplicateRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPReplicateSC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPReplicateSC; } @@ -1163,6 +1160,13 @@ /// the \p State. void execute(VPTransformState &State) override; + Instruction *getIngredient() { + return cast(getUnderlyingValue()); + } + const Instruction *getIngredient() const { + return cast(getUnderlyingValue()); + } + void setAlsoPack(bool Pack) { AlsoPack = Pack; } /// Print the recipe. @@ -1171,16 +1175,21 @@ }; /// A recipe for generating conditional branches on the bits of a mask. -class VPBranchOnMaskRecipe : public VPRecipeBase { - VPUser User; - +class VPBranchOnMaskRecipe : public VPRecipeBase, + public VPValue, + public VPUser { public: - VPBranchOnMaskRecipe(VPValue *BlockInMask) : VPRecipeBase(VPBranchOnMaskSC) { + VPBranchOnMaskRecipe(VPValue *BlockInMask) + : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC), + VPValue(VPValue::VPBranchOnMaskSC) { if (BlockInMask) // nullptr means all-one mask. - User.addOperand(BlockInMask); + addOperand(BlockInMask); } /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPBranchOnMaskSC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPBranchOnMaskSC; } @@ -1203,9 +1212,9 @@ /// Return the mask used by this recipe. Note that a full mask is represented /// by a nullptr. VPValue *getMask() const { - assert(User.getNumOperands() <= 1 && "should have either 0 or 1 operands"); + assert(getNumOperands() <= 1 && "should have either 0 or 1 operands"); // Mask is optional. - return User.getNumOperands() == 1 ? User.getOperand(0) : nullptr; + return getNumOperands() == 1 ? getOperand(0) : nullptr; } }; @@ -1214,17 +1223,19 @@ /// order to merge values that are set under such a branch and feed their uses. /// The phi nodes can be scalar or vector depending on the users of the value. /// This recipe works in concert with VPBranchOnMaskRecipe. -class VPPredInstPHIRecipe : public VPRecipeBase { - Instruction *PredInst; - +class VPPredInstPHIRecipe : public VPRecipeBase, public VPValue { public: /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi /// nodes after merging back from a Branch-on-Mask. VPPredInstPHIRecipe(Instruction *PredInst) - : VPRecipeBase(VPPredInstPHISC), PredInst(PredInst) {} + : VPRecipeBase(VPRecipeBase::VPPredInstPHISC), + VPValue(VPValue::VPPredInstPHISC, PredInst) {} ~VPPredInstPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPPredInstPHISC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPPredInstPHISC; } @@ -1235,6 +1246,11 @@ /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; + + Instruction *getPredInst() { return cast(getUnderlyingValue()); } + const Instruction *getPredInst() const { + return cast(getUnderlyingValue()); + } }; /// A Recipe for widening load/store operations. @@ -1243,59 +1259,62 @@ /// - For store: Address, stored value, optional mask /// TODO: We currently execute only per-part unless a specific instance is /// provided. -class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPValue { - VPUser User; - +class VPWidenMemoryInstructionRecipe : public VPRecipeBase, + public VPValue, + public VPUser { void setMask(VPValue *Mask) { if (!Mask) return; - User.addOperand(Mask); + addOperand(Mask); } bool isMasked() const { return (isa(getUnderlyingInstruction()) && - User.getNumOperands() == 2) || + getNumOperands() == 2) || (isa(getUnderlyingInstruction()) && - User.getNumOperands() == 3); + getNumOperands() == 3); } public: VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC), - VPValue(VPValue::VPMemoryInstructionSC, &Load), User({Addr}) { + : VPRecipeBase(VPRecipeBase::VPWidenMemoryInstructionSC), + VPValue(VPValue::VPWidenMemoryInstructionSC, &Load), VPUser({Addr}) { setMask(Mask); } VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredValue, VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC), - VPValue(VPValue::VPMemoryInstructionSC, &Store), - User({Addr, StoredValue}) { + : VPRecipeBase(VPRecipeBase::VPWidenMemoryInstructionSC), + VPValue(VPValue::VPWidenMemoryInstructionSC, &Store), + VPUser({Addr, StoredValue}) { setMask(Mask); } /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPWidenMemoryInstructionSC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC; } /// Return the address accessed by this recipe. VPValue *getAddr() const { - return User.getOperand(0); // Address is the 1st, mandatory operand. + return getOperand(0); // Address is the 1st, mandatory operand. } /// Return the mask used by this recipe. Note that a full mask is represented /// by a nullptr. VPValue *getMask() const { // Mask is optional and therefore the last operand. - return isMasked() ? User.getOperand(User.getNumOperands() - 1) : nullptr; + return isMasked() ? getOperand(getNumOperands() - 1) : nullptr; } /// Return the address accessed by this recipe. VPValue *getStoredValue() const { assert(isa(getUnderlyingInstruction()) && "Stored value only available for store instructions"); - return User.getOperand(1); // Stored value is the 2nd, mandatory operand. + return getOperand(1); // Stored value is the 2nd, mandatory operand. } /// Generate the wide load/store. @@ -1314,20 +1333,17 @@ }; /// A Recipe for widening the canonical induction variable of the vector loop. -class VPWidenCanonicalIVRecipe : public VPRecipeBase { - /// A VPValue representing the canonical vector IV. - VPValue Val; - +class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue { public: - VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC) {} + VPWidenCanonicalIVRecipe() + : VPRecipeBase(VPRecipeBase::VPWidenCanonicalIVSC), + VPValue(VPValue::VPWidenCanonicalIVSC) {} ~VPWidenCanonicalIVRecipe() override = default; - /// Return the VPValue representing the canonical vector induction variable of - /// the vector loop. - const VPValue *getVPValue() const { return &Val; } - VPValue *getVPValue() { return &Val; } - /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPWidenCanonicalIVSC; + } static inline bool classof(const VPRecipeBase *V) { return V->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC; } @@ -1741,6 +1757,21 @@ Value2VPValue[V] = VPV; } + void addOrReplaceVPValue(Value *V, VPValue *VPV) { + assert(V && "Trying to get or add the VPValue of a null Value"); + if (!Value2VPValue.count(V)) { + addVPValue(V, VPV); + return; + } + VPValue *OldVPV = Value2VPValue[V]; + Value2VPValue[V] = VPV; + auto It = find(VPValuesToFree, OldVPV); + if (It != VPValuesToFree.end()) { + delete OldVPV; + VPValuesToFree.erase(It); + } + } + VPValue *getVPValue(Value *V) { assert(V && "Trying to get the VPValue of a null Value"); assert(Value2VPValue.count(V) && "Value does not exist in VPlan"); Index: llvm/lib/Transforms/Vectorize/VPlan.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.cpp +++ llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -354,6 +354,44 @@ return getParent()->getRecipeList().erase(getIterator()); } +/// Obviously needs to change +static VPUser *getVPUser(VPRecipeBase *Base) { + switch (Base->getVPRecipeID()) { + case VPRecipeBase::VPBlendSC: + case VPRecipeBase::VPBranchOnMaskSC: + case VPRecipeBase::VPInstructionSC: + case VPRecipeBase::VPInterleaveSC: + case VPRecipeBase::VPPredInstPHISC: + case VPRecipeBase::VPReductionSC: + case VPRecipeBase::VPReplicateSC: + case VPRecipeBase::VPWidenCallSC: + case VPRecipeBase::VPWidenCanonicalIVSC: + case VPRecipeBase::VPWidenGEPSC: + case VPRecipeBase::VPWidenIntOrFpInductionSC: + case VPRecipeBase::VPWidenMemoryInstructionSC: + case VPRecipeBase::VPWidenPHISC: + case VPRecipeBase::VPWidenSC: + case VPRecipeBase::VPWidenSelectSC: + return (VPUser *)Base; // Not quite right + } + return nullptr; +} + +void VPRecipeBase::recursivelyDeleteUnusedRecipes() { + // TODOD: This doesn't work yet... + VPUser *ThisUser = getVPUser(this); + if (ThisUser && + cast(this)->getNumUsers() == 0 /* && isSafeToRemove()*/) { + for (auto *Op : ThisUser->operands()) { + Op->removeUser(ThisUser); + if (VPRecipeBase *R = dyn_cast(Op)) + R->recursivelyDeleteUnusedRecipes(); + } + ThisUser->clearOperands(); + eraseFromParent(); + } +} + void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { removeFromParent(); insertAfter(InsertPos); @@ -407,6 +445,32 @@ State.set(this, Call, Part); break; } + case VPInstruction::Mulhs: + case VPInstruction::Mulhu: { + // TODOD: Create a Mulh instrinsic instead? + Value *A = State.get(getOperand(0), Part); + Value *B = State.get(getOperand(1), Part); + + auto Ty = cast(A->getType()); + unsigned BW = Ty->getScalarSizeInBits(); + auto *WidenTy = VectorType::getExtendedElementVectorType(Ty); + + auto *ExtA = getOpcode() == VPInstruction::Mulhs + ? Builder.CreateSExt(A, WidenTy) + : Builder.CreateZExt(A, WidenTy); + auto *ExtB = getOpcode() == VPInstruction::Mulhs + ? Builder.CreateSExt(B, WidenTy) + : Builder.CreateZExt(B, WidenTy); + auto *Mul = Builder.CreateMul(ExtA, ExtB); + auto *Shift = Builder.CreateLShr( + Mul, ConstantDataVector::getSplat( + State.VF.getKnownMinValue(), + ConstantInt::get(WidenTy->getElementType(), BW))); + auto *Trunc = Builder.CreateTrunc(Shift, Ty); + + State.set(this, Trunc, Part); + break; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -451,6 +515,12 @@ case VPInstruction::ActiveLaneMask: O << "active lane mask"; break; + case VPInstruction::Mulhs: + O << "mulhs"; + break; + case VPInstruction::Mulhu: + O << "mulhu"; + break; default: O << Instruction::getOpcodeName(getOpcode()); @@ -759,18 +829,18 @@ O << "\"WIDEN "; printAsOperand(O, SlotTracker); O << " = select "; - User.getOperand(0)->printAsOperand(O, SlotTracker); + getOperand(0)->printAsOperand(O, SlotTracker); O << ", "; - User.getOperand(1)->printAsOperand(O, SlotTracker); + getOperand(1)->printAsOperand(O, SlotTracker); O << ", "; - User.getOperand(2)->printAsOperand(O, SlotTracker); + getOperand(2)->printAsOperand(O, SlotTracker); O << (InvariantCond ? " (condition is loop invariant)" : ""); } void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << "\"WIDEN\\l\""; - O << "\" " << VPlanIngredient(&Ingredient); + O << "\" " << VPlanIngredient(getUnderlyingInstruction()); } void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, @@ -778,10 +848,10 @@ O << "\"WIDEN-INDUCTION"; if (Trunc) { O << "\\l\""; - O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; + O << " +\n" << Indent << "\" " << VPlanIngredient(getIV()) << "\\l\""; O << " +\n" << Indent << "\" " << VPlanIngredient(Trunc); } else - O << " " << VPlanIngredient(IV); + O << " " << VPlanIngredient(getIV()); } void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, @@ -798,13 +868,13 @@ void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << "\"WIDEN-PHI " << VPlanIngredient(Phi); + O << "\"WIDEN-PHI " << VPlanIngredient(getPhi()); } void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << "\"BLEND "; - Phi->printAsOperand(O, false); + getPhi()->printAsOperand(O, false); O << " ="; if (getNumIncomingValues() == 1) { // Not a User of any mask: not really blending, this is a @@ -823,13 +893,13 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << "\"REDUCE of" << *I << " as "; - ChainOp->printAsOperand(O, SlotTracker); + O << "\"REDUCE of" << *getUnderlyingValue() << " as "; + getChainOp()->printAsOperand(O, SlotTracker); O << " + reduce("; - VecOp->printAsOperand(O, SlotTracker); - if (CondOp) { + getVecOp()->printAsOperand(O, SlotTracker); + if (getCondOp()) { O << ", "; - CondOp->printAsOperand(O, SlotTracker); + getCondOp()->printAsOperand(O, SlotTracker); } O << ")"; } @@ -837,14 +907,14 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << "\"" << (IsUniform ? "CLONE " : "REPLICATE ") - << VPlanIngredient(Ingredient); + << VPlanIngredient(getIngredient()); if (AlsoPack) O << " (S->V)"; } void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { - O << "\"PHI-PREDICATED-INSTRUCTION " << VPlanIngredient(PredInst); + O << "\"PHI-PREDICATED-INSTRUCTION " << VPlanIngredient(getPredInst()); } void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, @@ -879,14 +949,14 @@ Constant *VStep = VF == 1 ? Indices.back() : ConstantVector::get(Indices); // Add the consecutive indices to the vector value. Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); - State.set(getVPValue(), CanonicalVectorIV, Part); + State.set(this, CanonicalVectorIV, Part); } } void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << "\"EMIT "; - getVPValue()->printAsOperand(O, SlotTracker); + printAsOperand(O, SlotTracker); O << " = WIDEN-CANONICAL-INDUCTION"; } @@ -897,6 +967,7 @@ for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) if (User->getOperand(I) == this) User->setOperand(I, New); + Users.clear(); } void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const { @@ -990,7 +1061,7 @@ if (const auto *VPI = dyn_cast(&Recipe)) assignSlot(VPI); else if (const auto *VPIV = dyn_cast(&Recipe)) - assignSlot(VPIV->getVPValue()); + assignSlot(VPIV); } } Index: llvm/lib/Transforms/Vectorize/VPlanValue.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlanValue.h +++ llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -31,6 +31,7 @@ class Value; class VPSlotTracker; class VPUser; +class VPRecipeBase; // This is the base class of the VPlan Def/Use graph, used for modeling the data // flow into, within and out of the VPlan. VPValues can stand for live-ins @@ -61,10 +62,12 @@ // for multiple underlying IRs (Polly?) by providing a new VPlan front-end, // back-end and analysis information for the new IR. +public: /// Return the underlying Value attached to this VPValue. Value *getUnderlyingValue() { return UnderlyingVal; } const Value *getUnderlyingValue() const { return UnderlyingVal; } +protected: // Set \p Val as the underlying Value of this VPValue. void setUnderlyingValue(Value *Val) { assert(!UnderlyingVal && "Underlying Value is already set."); @@ -79,12 +82,22 @@ enum { VPValueSC, VPMultiValueSC, + + VPBlendSC, // All following ID's should be VPRecipes. + VPBranchOnMaskSC, VPInstructionSC, - VPMemoryInstructionSC, - VPVWidenCallSC, - VPVWidenSelectSC, - VPVWidenGEPSC, - VPVInterleaveSC + VPInterleaveSC, + VPPredInstPHISC, + VPReductionSC, + VPReplicateSC, + VPWidenCallSC, + VPWidenCanonicalIVSC, + VPWidenGEPSC, + VPWidenIntOrFpInductionSC, + VPWidenMemoryInstructionSC, + VPWidenPHISC, + VPWidenSC, + VPWidenSelectSC }; VPValue(Value *UV = nullptr) : VPValue(VPValueSC, UV) {} @@ -100,11 +113,22 @@ virtual VPValue *getDefiningValue() { return this; } virtual VPValue const *getDefiningValue() const { return this; } + Type *getType() { + if (UnderlyingVal) + return UnderlyingVal->getType(); + return nullptr; + } + void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const; void print(raw_ostream &OS, VPSlotTracker &Tracker) const; unsigned getNumUsers() const { return Users.size(); } virtual void addUser(VPUser &User) { Users.push_back(&User); } + void removeUser(VPUser *User) { + auto It = find(Users, User); + assert(It != Users.end() && "Expected User in Users"); + Users.erase(It); + } typedef SmallVectorImpl::iterator user_iterator; typedef SmallVectorImpl::const_iterator const_user_iterator; @@ -133,6 +157,8 @@ } void replaceAllUsesWith(VPValue *New); + + static inline bool classof(const VPRecipeBase *V) { return true; } }; /// This class can be used to reference a VPValue produced by a recipe that @@ -197,6 +223,7 @@ } void setOperand(unsigned I, VPValue *New) { Operands[I] = New; } + void clearOperands() { Operands.clear(); } typedef SmallVectorImpl::iterator operand_iterator; typedef SmallVectorImpl::const_iterator const_operand_iterator;