Index: llvm/include/llvm/IR/IRBuilder.h =================================================================== --- llvm/include/llvm/IR/IRBuilder.h +++ llvm/include/llvm/IR/IRBuilder.h @@ -2437,6 +2437,12 @@ unsigned Index, unsigned FieldIndex, MDNode *DbgInfo); + /// Return an all true boolean vector of size and scalability \p NumElts. + Value *getTrueVector(ElementCount NumElts) { + VectorType *VTy = VectorType::get(Type::getInt1Ty(Context), NumElts); + return Constant::getAllOnesValue(VTy); + } + private: /// Helper function that creates an assume intrinsic call that /// represents an alignment assumption on the provided pointer \p PtrValue Index: llvm/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.h +++ llvm/lib/Transforms/Vectorize/VPlan.h @@ -792,6 +792,7 @@ inline bool VPUser::classof(const VPDef *Def) { return Def->getVPDefID() == VPRecipeBase::VPInstructionSC || + Def->getVPDefID() == VPRecipeBase::VPPredicatedWidenSC || Def->getVPDefID() == VPRecipeBase::VPWidenSC || Def->getVPDefID() == VPRecipeBase::VPWidenCallSC || Def->getVPDefID() == VPRecipeBase::VPWidenSelectSC || @@ -801,6 +802,8 @@ Def->getVPDefID() == VPRecipeBase::VPReplicateSC || Def->getVPDefID() == VPRecipeBase::VPReductionSC || Def->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC || + Def->getVPDefID() == + VPRecipeBase::VPPredicatedWidenMemoryInstructionSC || Def->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC; } @@ -822,6 +825,7 @@ SLPLoad, SLPStore, ActiveLaneMask, + AllTrueMask, CanonicalIVIncrement, CanonicalIVIncrementNUW, BranchOnCount, @@ -875,6 +879,13 @@ return R->getVPDefID() == VPRecipeBase::VPInstructionSC; } + /// Return ture if \p V is AllTrueMask VPInstruction. + static inline bool isAllTrueMask(const VPValue *V) { + if (auto *Instr = dyn_cast(V)) + return Instr->getOpcode() == VPInstruction::AllTrueMask; + return false; + } + unsigned getOpcode() const { return Opcode; } /// Generate the instruction. @@ -946,11 +957,17 @@ /// ingredient. This recipe covers most of the traditional vectorization cases /// where each ingredient transforms into a vectorized version of itself. class VPWidenRecipe : public VPRecipeBase, public VPValue { +protected: + template + VPWidenRecipe(Instruction &I, iterator_range Operands, + const unsigned char RecipeSC, const unsigned char ValueSC) + : VPRecipeBase(RecipeSC, Operands), VPValue(ValueSC, &I, this) {} + public: template VPWidenRecipe(Instruction &I, iterator_range Operands) - : VPRecipeBase(VPRecipeBase::VPWidenSC, Operands), - VPValue(VPValue::VPVWidenSC, &I, this) {} + : VPWidenRecipe(I, Operands, VPRecipeBase::VPWidenSC, + VPValue::VPVWidenSC) {} ~VPWidenRecipe() override = default; @@ -972,6 +989,47 @@ #endif }; +/// VPPredicatedWidenRecipe is a recipe for producing a copy of vector type +/// using VP intrinsics for its ingredient. This recipe covers most of the +/// traditional vectorization cases where each ingredient transforms into a +/// vectorized version of itself. +class VPPredicatedWidenRecipe : public VPWidenRecipe { +public: + template + VPPredicatedWidenRecipe(Instruction &I, iterator_range Operands, + VPValue *Mask, VPValue *EVL) + : VPWidenRecipe(I, Operands, VPRecipeBase::VPPredicatedWidenSC, + VPValue::VPVPredicatedWidenSC) { + addOperand(Mask); + addOperand(EVL); + } + + ~VPPredicatedWidenRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPPredicatedWidenSC; + } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVPredicatedWidenSC; + } + + /// Return the mask used by this recipe. + VPValue *getMask() const { return getOperand(getNumOperands() - 2); } + + /// Return the explicit vector length used by this recipe. + VPValue *getEVL() const { return getOperand(getNumOperands() - 1); } + + /// Generate the wide load/store. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for widening Call instructions. class VPWidenCallRecipe : public VPRecipeBase, public VPValue { @@ -1683,6 +1741,73 @@ } }; +/// A recipe to generate Explicit Vector Length (EVL) value to be used with +/// VPred intrinsics. +class VPCanonicalEVLRecipe : public VPRecipeBase, public VPValue { + +public: + VPCanonicalEVLRecipe(VPValue *IV, VPValue *TC) + : VPRecipeBase(VPRecipeBase::VPCanonicalEVLSC, {IV, TC}), + VPValue(VPValue::VPVCanonicalEVLSC, nullptr, this) {} + ~VPCanonicalEVLRecipe() override = default; + + /// Return the VPValue representing EVL. + const VPValue *getEVL() const { return this; } + VPValue *getEVL() { return this; } + + /// Return VPValue representing Induction Variable. + VPValue *getIV() const { return getOperand(0); } + + /// Return VPValue representing trip count. + VPValue *getTripCount() const { return getOperand(1); } + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPCanonicalEVLSC; + } + + /// Generate the instructions to compute EVL. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + +/// Canonical scalar induction increment of the vector loop with Explicit Vector +/// Length. +class VPCanonicalIVIncrementEVLRecipe : public VPRecipeBase, public VPValue { + +public: + VPCanonicalIVIncrementEVLRecipe(VPValue *IV, VPValue *EVL) + : VPRecipeBase(VPRecipeBase::VPCanonicalIVIncrementEVLSC, {IV, EVL}), + VPValue(VPValue::VPVCanonicalIVIncrementEVLSC, nullptr, this) {} + ~VPCanonicalIVIncrementEVLRecipe() override = default; + + /// Return VPValue representing Induction Variable. + VPValue *getIV() const { return getOperand(0); } + + /// Return VPValue representing Explicit Vector Length (EVL). + VPValue *getEVL() const { return getOperand(1); } + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPCanonicalIVIncrementEVLSC; + } + + /// Generate the canonical scalar induction increment of the vector loop with + /// Explicit Vector Length. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when /// control converges back from a Branch-on-Mask. The phi nodes are needed in /// order to merge values that are set under such a branch and feed their uses. @@ -1726,6 +1851,17 @@ /// TODO: We currently execute only per-part unless a specific instance is /// provided. class VPWidenMemoryInstructionRecipe : public VPRecipeBase { +protected: + void setMask(VPValue *Mask) { + if (!Mask) + return; + addOperand(Mask); + } + + bool isMasked() const { + return isStore() ? getNumOperands() == 3 : getNumOperands() == 2; + } + Instruction &Ingredient; // Whether the loaded-from / stored-to addresses are consecutive. @@ -1734,32 +1870,40 @@ // Whether the consecutive loaded/stored addresses are in reverse order. bool Reverse; - void setMask(VPValue *Mask) { - if (!Mask) - return; - addOperand(Mask); + VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, + const unsigned char RecipeSC, + const unsigned char ValueSC, bool Consecutive, + bool Reverse) + : VPRecipeBase(RecipeSC, {Addr}), Ingredient(Load), + Consecutive(Consecutive), Reverse(Reverse) { + assert((Consecutive || !Reverse) && "Reverse implies consecutive"); + new VPValue(ValueSC, &Load, this); } - bool isMasked() const { - return isStore() ? getNumOperands() == 3 : getNumOperands() == 2; + VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, + VPValue *StoredValue, + const unsigned char RecipeSC, bool Consecutive, + bool Reverse) + : VPRecipeBase(RecipeSC, {Addr, StoredValue}), Ingredient(Store), + Consecutive(Consecutive), Reverse(Reverse) { + assert((Consecutive || !Reverse) && "Reverse implies consecutive"); } public: VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse) - : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load), - Consecutive(Consecutive), Reverse(Reverse) { - assert((Consecutive || !Reverse) && "Reverse implies consecutive"); - new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this); + : VPWidenMemoryInstructionRecipe(Load, Addr, VPWidenMemoryInstructionSC, + VPValue::VPVMemoryInstructionSC, + Consecutive, Reverse) { setMask(Mask); } VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredValue, VPValue *Mask, bool Consecutive, bool Reverse) - : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}), - Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) { - assert((Consecutive || !Reverse) && "Reverse implies consecutive"); + : VPWidenMemoryInstructionRecipe(Store, Addr, StoredValue, + VPWidenMemoryInstructionSC, Consecutive, + Reverse) { setMask(Mask); } @@ -1820,6 +1964,68 @@ Instruction &getIngredient() const { return Ingredient; } }; +/// A Recipe for widening load/store operations to VP intrinsics. +/// The recipe uses the following VPValues: +/// - For load: Address, mask, EVL +/// - For store: Address, stored value, mask, EVL +class VPPredicatedWidenMemoryInstructionRecipe + : public VPWidenMemoryInstructionRecipe { + void setEVL(VPValue *EVL) { + if (!EVL) + return; + addOperand(EVL); + } + +public: + VPPredicatedWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, + VPValue *Mask, VPValue *EVL, + bool Consecutive, bool Reverse) + : VPWidenMemoryInstructionRecipe( + Load, Addr, VPPredicatedWidenMemoryInstructionSC, + VPValue::VPVPredicatedMemoryInstructionSC, Consecutive, Reverse) { + setMask(Mask); + setEVL(EVL); + } + + VPPredicatedWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, + VPValue *StoredValue, VPValue *Mask, + VPValue *EVL, bool Consecutive, + bool Reverse) + : VPWidenMemoryInstructionRecipe(Store, Addr, StoredValue, + VPPredicatedWidenMemoryInstructionSC, + Consecutive, Reverse) { + setMask(Mask); + setEVL(EVL); + } + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == + VPValue::VPVPredicatedMemoryInstructionSC; + } + + /// Return the mask used by this recipe. + VPValue *getMask() const { + // Mask is the second last, mandatory operand. + return getOperand(getNumOperands() - 2); + } + + /// Return the EVL used by this recipe. + VPValue *getEVL() const { + // EVL is the last, mandatory operand. + return getOperand(getNumOperands() - 1); + } + + /// Generate the wide load/store. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// Recipe to expand a SCEV expression. class VPExpandSCEVRecipe : public VPRecipeBase, public VPValue { const SCEV *Expr; Index: llvm/lib/Transforms/Vectorize/VPlan.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.cpp +++ llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -571,6 +571,7 @@ case VPWidenPHISC: case VPBlendSC: case VPWidenSC: + case VPPredicatedWidenSC: case VPWidenGEPSC: case VPReductionSC: case VPWidenSelectSC: { @@ -602,6 +603,7 @@ case VPWidenPHISC: case VPBlendSC: case VPWidenSC: + case VPPredicatedWidenSC: case VPWidenGEPSC: case VPReductionSC: case VPWidenSelectSC: { @@ -751,6 +753,11 @@ State.set(this, Call, Part); break; } + case VPInstruction::AllTrueMask: { + Value *AllTrueMask = Builder.getTrueVector(State.VF); + State.set(this, AllTrueMask, Part); + break; + } case VPInstruction::FirstOrderRecurrenceSplice: { // Generate code to combine the previous and current values in vector v3. // @@ -863,6 +870,9 @@ case VPInstruction::ActiveLaneMask: O << "active lane mask"; break; + case VPInstruction::AllTrueMask: + O << "all true mask"; + break; case VPInstruction::FirstOrderRecurrenceSplice: O << "first-order splice"; break; @@ -1316,6 +1326,14 @@ printOperands(O, SlotTracker); } +void VPPredicatedWidenRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "PREDICATED-WIDEN "; + printAsOperand(O, SlotTracker); + O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; + printOperands(O, SlotTracker); +} + void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-INDUCTION"; @@ -1543,6 +1561,19 @@ } #endif +void VPPredicatedWidenMemoryInstructionRecipe::print( + raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { + O << Indent << "PREDICATED-WIDEN "; + + if (!isStore()) { + getVPSingleValue()->printAsOperand(O, SlotTracker); + O << " = "; + } + O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; + + printOperands(O, SlotTracker); +} + void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { Value *CanonicalIV = State.get(getOperand(0), 0); Type *STy = CanonicalIV->getType(); @@ -1685,6 +1716,21 @@ } #endif +void VPCanonicalEVLRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + printAsOperand(O, SlotTracker); + O << " = GENERATE-EXPLICIT-VECTOR-LENGTH"; +} + +void VPCanonicalIVIncrementEVLRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + printAsOperand(O, SlotTracker); + O << " = add "; + printOperands(O, SlotTracker); +} + template void DomTreeBuilder::Calculate(VPDominatorTree &DT); void VPValue::replaceAllUsesWith(VPValue *New) { Index: llvm/lib/Transforms/Vectorize/VPlanValue.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlanValue.h +++ llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -51,6 +51,7 @@ friend class VPSlotTracker; friend class VPRecipeBase; friend class VPWidenMemoryInstructionRecipe; + friend class VPPredicatedWidenMemoryInstructionRecipe; const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). @@ -100,6 +101,12 @@ VPVWidenGEPSC, VPVWidenSelectSC, + // Vector Predication values. + VPVPredicatedMemoryInstructionSC, + VPVPredicatedWidenSC, + VPVCanonicalEVLSC, + VPVCanonicalIVIncrementEVLSC, + // Phi-like VPValues. Need to be kept together. VPVBlendSC, VPVCanonicalIVPHISC, @@ -358,6 +365,12 @@ VPWidenSC, VPWidenSelectSC, + // Vector Predication recipes. + VPPredicatedWidenMemoryInstructionSC, + VPPredicatedWidenSC, + VPCanonicalEVLSC, + VPCanonicalIVIncrementEVLSC, + // Phi-like recipes. Need to be kept together. VPBlendSC, VPCanonicalIVPHISC,