diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -2605,6 +2605,12 @@ unsigned Index, unsigned FieldIndex, MDNode *DbgInfo); + /// Return an all true boolean vector of size and scalability \p NumElts. + Value *getTrueVector(ElementCount NumElts) { + VectorType *VTy = VectorType::get(Type::getInt1Ty(Context), NumElts); + return Constant::getAllOnesValue(VTy); + } + private: /// Helper function that creates an assume intrinsic call that /// represents an alignment assumption on the provided pointer \p PtrValue diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -754,6 +754,7 @@ inline bool VPUser::classof(const VPDef *Def) { return Def->getVPDefID() == VPRecipeBase::VPInstructionSC || + Def->getVPDefID() == VPRecipeBase::VPPredicatedWidenSC || Def->getVPDefID() == VPRecipeBase::VPWidenSC || Def->getVPDefID() == VPRecipeBase::VPWidenCallSC || Def->getVPDefID() == VPRecipeBase::VPWidenSelectSC || @@ -763,6 +764,8 @@ Def->getVPDefID() == VPRecipeBase::VPReplicateSC || Def->getVPDefID() == VPRecipeBase::VPReductionSC || Def->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC || + Def->getVPDefID() == + VPRecipeBase::VPPredicatedWidenMemoryInstructionSC || Def->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC; } @@ -784,6 +787,7 @@ SLPLoad, SLPStore, ActiveLaneMask, + AllTrueMask, }; private: @@ -876,11 +880,17 @@ /// ingredient. This recipe covers most of the traditional vectorization cases /// where each ingredient transforms into a vectorized version of itself. class VPWidenRecipe : public VPRecipeBase, public VPValue { +protected: + template + VPWidenRecipe(Instruction &I, iterator_range Operands, + const unsigned char RecipeSC, const unsigned char ValueSC) + : VPRecipeBase(RecipeSC, Operands), VPValue(ValueSC, &I, this) {} + public: template VPWidenRecipe(Instruction &I, iterator_range Operands) - : VPRecipeBase(VPRecipeBase::VPWidenSC, Operands), - VPValue(VPValue::VPVWidenSC, &I, this) {} + : VPWidenRecipe(I, Operands, VPRecipeBase::VPWidenSC, + VPValue::VPVWidenSC) {} ~VPWidenRecipe() override = default; @@ -902,6 +912,47 @@ #endif }; +/// VPPredicatedWidenRecipe is a recipe for producing a copy of vector type +/// using VP intrinsics for its ingredient. This recipe covers most of the +/// traditional vectorization cases where each ingredient transforms into a +/// vectorized version of itself. +class VPPredicatedWidenRecipe : public VPWidenRecipe { +public: + template + VPPredicatedWidenRecipe(Instruction &I, iterator_range Operands, + VPValue *Mask, VPValue *EVL) + : VPWidenRecipe(I, Operands, VPRecipeBase::VPPredicatedWidenSC, + VPValue::VPVPredicatedWidenSC) { + addOperand(Mask); + addOperand(EVL); + } + + ~VPPredicatedWidenRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPPredicatedWidenSC; + } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVPredicatedWidenSC; + } + + /// Return the mask used by this recipe. + VPValue *getMask() const { return getOperand(getNumOperands() - 2); } + + /// Return the explicit vector length used by this recipe. + VPValue *getEVL() const { return getOperand(getNumOperands() - 1); } + + /// Generate the wide load/store. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for widening Call instructions. class VPWidenCallRecipe : public VPRecipeBase, public VPValue { @@ -1476,6 +1527,41 @@ } }; +/// A recipe to generate Explicit Vector Length (EVL) value to be used with +/// VPred intrinsics. +class VPWidenEVLRecipe : public VPRecipeBase, public VPValue { + +public: + VPWidenEVLRecipe(VPValue *IV, VPValue *TC) + : VPRecipeBase(VPRecipeBase::VPWidenEVLSC, {IV, TC}), + VPValue(VPValue::VPVWidenEVLSC, nullptr, this) {} + ~VPWidenEVLRecipe() override = default; + + /// Return the VPValue representing EVL. + const VPValue *getEVL() const { return this; } + VPValue *getEVL() { return this; } + + /// Return VPValue representing Induction Variable. + VPValue *getIV() const { return getOperand(0); } + + /// Return VPValue representing trip count. + VPValue *getTripCount() const { return getOperand(1); } + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPWidenEVLSC; + } + + /// Generate the instructions to compute EVL. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when /// control converges back from a Branch-on-Mask. The phi nodes are needed in /// order to merge values that are set under such a branch and feed their uses. @@ -1512,6 +1598,17 @@ /// TODO: We currently execute only per-part unless a specific instance is /// provided. class VPWidenMemoryInstructionRecipe : public VPRecipeBase { + void setMask(VPValue *Mask) { + if (!Mask) + return; + addOperand(Mask); + } + + bool isMasked() const { + return isStore() ? getNumOperands() == 3 : getNumOperands() == 2; + } + +protected: Instruction &Ingredient; // Whether the loaded-from / stored-to addresses are consecutive. @@ -1520,32 +1617,41 @@ // Whether the consecutive loaded/stored addresses are in reverse order. bool Reverse; - void setMask(VPValue *Mask) { - if (!Mask) - return; - addOperand(Mask); + + VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, + const unsigned char RecipeSC, + const unsigned char ValueSC, bool Consecutive, + bool Reverse) + : VPRecipeBase(RecipeSC, {Addr}), Ingredient(Load), + Consecutive(Consecutive), Reverse(Reverse) { + assert((Consecutive || !Reverse) && "Reverse implies consecutive"); + new VPValue(ValueSC, &Load, this); } - bool isMasked() const { - return isStore() ? getNumOperands() == 3 : getNumOperands() == 2; + VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, + VPValue *StoredValue, + const unsigned char RecipeSC, bool Consecutive, + bool Reverse) + : VPRecipeBase(RecipeSC, {Addr, StoredValue}), Ingredient(Store), + Consecutive(Consecutive), Reverse(Reverse) { + assert((Consecutive || !Reverse) && "Reverse implies consecutive"); } public: VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse) - : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load), - Consecutive(Consecutive), Reverse(Reverse) { - assert((Consecutive || !Reverse) && "Reverse implies consecutive"); - new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this); + : VPWidenMemoryInstructionRecipe(Load, Addr, VPWidenMemoryInstructionSC, + VPValue::VPVMemoryInstructionSC, + Consecutive, Reverse) { setMask(Mask); } VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredValue, VPValue *Mask, bool Consecutive, bool Reverse) - : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}), - Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) { - assert((Consecutive || !Reverse) && "Reverse implies consecutive"); + : VPWidenMemoryInstructionRecipe(Store, Addr, StoredValue, + VPWidenMemoryInstructionSC, Consecutive, + Reverse) { setMask(Mask); } @@ -1592,6 +1698,63 @@ #endif }; +/// A Recipe for widening load/store operations to VP intrinsics. +/// The recipe uses the following VPValues: +/// - For load: Address, mask, EVL +/// - For store: Address, stored value, mask, EVL +class VPPredicatedWidenMemoryInstructionRecipe + : public VPWidenMemoryInstructionRecipe { + +public: + VPPredicatedWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, + VPValue *Mask, VPValue *EVL, + bool Consecutive, bool Reverse) + : VPWidenMemoryInstructionRecipe( + Load, Addr, VPPredicatedWidenMemoryInstructionSC, + VPValue::VPVPredicatedMemoryInstructionSC, Consecutive, Reverse) { + addOperand(Mask); + addOperand(EVL); + } + + VPPredicatedWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, + VPValue *StoredValue, VPValue *Mask, + VPValue *EVL, bool Consecutive, + bool Reverse) + : VPWidenMemoryInstructionRecipe(Store, Addr, StoredValue, + VPPredicatedWidenMemoryInstructionSC, + Consecutive, Reverse) { + addOperand(Mask); + addOperand(EVL); + } + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == + VPRecipeBase::VPPredicatedWidenMemoryInstructionSC; + } + + /// Return the mask used by this recipe. + VPValue *getMask() const { + // Mask is the second last, mandatory operand. + return getOperand(getNumOperands() - 2); + } + + /// Return the EVL used by this recipe. + VPValue *getEVL() const { + // EVL is the last, mandatory operand. + return getOperand(getNumOperands() - 1); + } + + /// Generate the wide load/store. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A Recipe for widening the canonical induction variable of the vector loop. class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue { public: diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -580,6 +580,7 @@ case VPWidenPHISC: case VPBlendSC: case VPWidenSC: + case VPPredicatedWidenSC: case VPWidenGEPSC: case VPReductionSC: case VPWidenSelectSC: { @@ -711,6 +712,11 @@ } break; } + case VPInstruction::AllTrueMask: { + Value *AllTrueMask = Builder.getTrueVector(State.VF); + State.set(this, AllTrueMask, Part); + break; + } default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -756,6 +762,9 @@ case VPInstruction::FirstOrderRecurrenceSplice: O << "first-order splice"; break; + case VPInstruction::AllTrueMask: + O << "all true mask"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -1125,6 +1134,14 @@ printOperands(O, SlotTracker); } +void VPPredicatedWidenRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "PREDICATED-WIDEN "; + printAsOperand(O, SlotTracker); + O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; + printOperands(O, SlotTracker); +} + void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-INDUCTION"; @@ -1243,6 +1260,19 @@ } #endif +void VPPredicatedWidenMemoryInstructionRecipe::print( + raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { + O << Indent << "PREDICATED-WIDEN "; + + if (!isStore()) { + getVPSingleValue()->printAsOperand(O, SlotTracker); + O << " = "; + } + O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; + + printOperands(O, SlotTracker); +} + void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { Value *CanonicalIV = State.CanonicalIV; Type *STy = CanonicalIV->getType(); @@ -1387,6 +1417,13 @@ } #endif +void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + getEVL()->printAsOperand(O, SlotTracker); + O << " = GENERATE-EXPLICIT-VECTOR-LENGTH"; +} + template void DomTreeBuilder::Calculate(VPDominatorTree &DT); void VPValue::replaceAllUsesWith(VPValue *New) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -51,6 +51,7 @@ friend class VPSlotTracker; friend class VPRecipeBase; friend class VPWidenMemoryInstructionRecipe; + friend class VPPredicatedWidenMemoryInstructionRecipe; const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). @@ -92,10 +93,13 @@ VPValueSC, VPVInstructionSC, VPVMemoryInstructionSC, + VPVPredicatedMemoryInstructionSC, + VPVPredicatedWidenSC, VPVReductionSC, VPVReplicateSC, VPVWidenSC, VPVWidenCallSC, + VPVWidenEVLSC, VPVWidenGEPSC, VPVWidenSelectSC, @@ -322,9 +326,12 @@ VPBranchOnMaskSC, VPInstructionSC, VPInterleaveSC, + VPPredicatedWidenMemoryInstructionSC, + VPPredicatedWidenSC, VPReductionSC, VPReplicateSC, VPWidenCallSC, + VPWidenEVLSC, VPWidenGEPSC, VPWidenMemoryInstructionSC, VPWidenSC,