diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -2552,6 +2552,12 @@ unsigned Index, unsigned FieldIndex, MDNode *DbgInfo); + /// Return an all true boolean vector of size and scalability \p NumElts. + Value *getTrueVector(ElementCount NumElts) { + VectorType *VTy = VectorType::get(Type::getInt1Ty(Context), NumElts); + return Constant::getAllOnesValue(VTy); + } + private: /// Helper function that creates an assume intrinsic call that /// represents an alignment assumption on the provided pointer \p PtrValue diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -748,6 +748,7 @@ inline bool VPUser::classof(const VPDef *Def) { return Def->getVPDefID() == VPRecipeBase::VPInstructionSC || + Def->getVPDefID() == VPRecipeBase::VPPredicatedWidenSC || Def->getVPDefID() == VPRecipeBase::VPWidenSC || Def->getVPDefID() == VPRecipeBase::VPWidenCallSC || Def->getVPDefID() == VPRecipeBase::VPWidenSelectSC || @@ -757,6 +758,8 @@ Def->getVPDefID() == VPRecipeBase::VPReplicateSC || Def->getVPDefID() == VPRecipeBase::VPReductionSC || Def->getVPDefID() == VPRecipeBase::VPBranchOnMaskSC || + Def->getVPDefID() == + VPRecipeBase::VPPredicatedWidenMemoryInstructionSC || Def->getVPDefID() == VPRecipeBase::VPWidenMemoryInstructionSC; } @@ -775,6 +778,7 @@ SLPLoad, SLPStore, ActiveLaneMask, + AllTrueMask, }; private: @@ -867,11 +871,17 @@ /// ingredient. This recipe covers most of the traditional vectorization cases /// where each ingredient transforms into a vectorized version of itself. class VPWidenRecipe : public VPRecipeBase, public VPValue { +protected: + template + VPWidenRecipe(Instruction &I, iterator_range Operands, + const unsigned char RecipeSC, const unsigned char ValueSC) + : VPRecipeBase(RecipeSC, Operands), VPValue(ValueSC, &I, this) {} + public: template VPWidenRecipe(Instruction &I, iterator_range Operands) - : VPRecipeBase(VPRecipeBase::VPWidenSC, Operands), - VPValue(VPValue::VPVWidenSC, &I, this) {} + : VPWidenRecipe(I, Operands, VPRecipeBase::VPWidenSC, + VPValue::VPVWidenSC) {} ~VPWidenRecipe() override = default; @@ -893,6 +903,47 @@ #endif }; +/// VPPredicatedWidenRecipe is a recipe for producing a copy of vector type +/// using VP intrinsics for its ingredient. This recipe covers most of the +/// traditional vectorization cases where each ingredient transforms into a +/// vectorized version of itself. +class VPPredicatedWidenRecipe : public VPWidenRecipe { +public: + template + VPPredicatedWidenRecipe(Instruction &I, iterator_range Operands, + VPValue *Mask, VPValue *EVL) + : VPWidenRecipe(I, Operands, VPRecipeBase::VPPredicatedWidenSC, + VPValue::VPVPredicatedWidenSC) { + addOperand(Mask); + addOperand(EVL); + } + + ~VPPredicatedWidenRecipe() override = default; + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPPredicatedWidenSC; + } + static inline bool classof(const VPValue *V) { + return V->getVPValueID() == VPValue::VPVPredicatedWidenSC; + } + + /// Return the mask used by this recipe. + VPValue *getMask() const { return getOperand(getNumOperands() - 2); } + + /// Return the explicit vector length used by this recipe. + VPValue *getEVL() const { return getOperand(getNumOperands() - 1); } + + /// Generate the wide load/store. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for widening Call instructions. class VPWidenCallRecipe : public VPRecipeBase, public VPValue { @@ -1373,6 +1424,41 @@ } }; +/// A recipe to generate Explicit Vector Length (EVL) value to be used with +/// VPred intrinsics. +class VPWidenEVLRecipe : public VPRecipeBase, public VPValue { + +public: + VPWidenEVLRecipe(VPValue *IV, VPValue *TC) + : VPRecipeBase(VPRecipeBase::VPWidenEVLSC, {IV, TC}), + VPValue(VPValue::VPVWidenEVLSC, nullptr, this) {} + ~VPWidenEVLRecipe() override = default; + + /// Return the VPValue representing EVL. + const VPValue *getEVL() const { return this; } + VPValue *getEVL() { return this; } + + /// Return VPValue representing Induction Variable. + VPValue *getIV() const { return getOperand(0); } + + /// Return VPValue representing trip count. + VPValue *getTripCount() const { return getOperand(1); } + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == VPRecipeBase::VPWidenEVLSC; + } + + /// Generate the instructions to compute EVL. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when /// control converges back from a Branch-on-Mask. The phi nodes are needed in /// order to merge values that are set under such a branch and feed their uses. @@ -1409,8 +1495,6 @@ /// TODO: We currently execute only per-part unless a specific instance is /// provided. class VPWidenMemoryInstructionRecipe : public VPRecipeBase { - Instruction &Ingredient; - void setMask(VPValue *Mask) { if (!Mask) return; @@ -1421,17 +1505,32 @@ return isStore() ? getNumOperands() == 3 : getNumOperands() == 2; } +protected: + Instruction &Ingredient; + + VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, + const unsigned char RecipeSC, + const unsigned char ValueSC) + : VPRecipeBase(RecipeSC, {Addr}), Ingredient(Load) { + new VPValue(ValueSC, &Load, this); + } + + VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, + VPValue *StoredValue, + const unsigned char RecipeSC) + : VPRecipeBase(RecipeSC, {Addr, StoredValue}), Ingredient(Store) {} + public: VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load) { - new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this); + : VPWidenMemoryInstructionRecipe(Load, Addr, VPWidenMemoryInstructionSC, + VPValue::VPVMemoryInstructionSC) { setMask(Mask); } VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredValue, VPValue *Mask) - : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}), - Ingredient(Store) { + : VPWidenMemoryInstructionRecipe(Store, Addr, StoredValue, + VPWidenMemoryInstructionSC) { setMask(Mask); } @@ -1471,6 +1570,60 @@ #endif }; +/// A Recipe for widening load/store operations to VP intrinsics. +/// The recipe uses the following VPValues: +/// - For load: Address, mask, EVL +/// - For store: Address, stored value, mask, EVL +class VPPredicatedWidenMemoryInstructionRecipe + : public VPWidenMemoryInstructionRecipe { + +public: + VPPredicatedWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, + VPValue *Mask, VPValue *EVL) + : VPWidenMemoryInstructionRecipe( + Load, Addr, VPPredicatedWidenMemoryInstructionSC, + VPValue::VPVPredicatedMemoryInstructionSC) { + addOperand(Mask); + addOperand(EVL); + } + + VPPredicatedWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, + VPValue *StoredValue, VPValue *Mask, + VPValue *EVL) + : VPWidenMemoryInstructionRecipe(Store, Addr, StoredValue, + VPPredicatedWidenMemoryInstructionSC) { + addOperand(Mask); + addOperand(EVL); + } + + /// Method to support type inquiry through isa, cast, and dyn_cast. + static inline bool classof(const VPDef *D) { + return D->getVPDefID() == + VPRecipeBase::VPPredicatedWidenMemoryInstructionSC; + } + + /// Return the mask used by this recipe. + VPValue *getMask() const { + // Mask is the second last, mandatory operand. + return getOperand(getNumOperands() - 2); + } + + /// Return the EVL used by this recipe. + VPValue *getEVL() const { + // EVL is the last, mandatory operand. + return getOperand(getNumOperands() - 1); + } + + /// Generate the wide load/store. + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A Recipe for widening the canonical induction variable of the vector loop. class VPWidenCanonicalIVRecipe : public VPRecipeBase { public: diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -580,6 +580,7 @@ case VPWidenPHISC: case VPBlendSC: case VPWidenSC: + case VPPredicatedWidenSC: case VPWidenGEPSC: case VPReductionSC: case VPWidenSelectSC: { @@ -687,6 +688,12 @@ State.set(this, Call, Part); break; } + case VPInstruction::AllTrueMask: { + Value *AllTrueMask = Builder.getTrueVector(State.VF); + State.set(this, AllTrueMask, Part); + break; + } + default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -729,7 +736,9 @@ case VPInstruction::ActiveLaneMask: O << "active lane mask"; break; - + case VPInstruction::AllTrueMask: + O << "all true mask"; + break; default: O << Instruction::getOpcodeName(getOpcode()); } @@ -1069,6 +1078,14 @@ printOperands(O, SlotTracker); } +void VPPredicatedWidenRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "PREDICATED-WIDEN "; + printAsOperand(O, SlotTracker); + O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; + printOperands(O, SlotTracker); +} + void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-INDUCTION"; @@ -1187,6 +1204,19 @@ } #endif +void VPPredicatedWidenMemoryInstructionRecipe::print( + raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { + O << Indent << "PREDICATED-WIDEN "; + + if (!isStore()) { + getVPValue()->printAsOperand(O, SlotTracker); + O << " = "; + } + O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; + + printOperands(O, SlotTracker); +} + void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { Value *CanonicalIV = State.CanonicalIV; Type *STy = CanonicalIV->getType(); @@ -1221,6 +1251,13 @@ } #endif +void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EMIT "; + getEVL()->printAsOperand(O, SlotTracker); + O << " = GENERATE-EXPLICIT-VECTOR-LENGTH"; +} + template void DomTreeBuilder::Calculate(VPDominatorTree &DT); void VPValue::replaceAllUsesWith(VPValue *New) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -51,6 +51,7 @@ friend class VPSlotTracker; friend class VPRecipeBase; friend class VPWidenMemoryInstructionRecipe; + friend class VPPredicatedWidenMemoryInstructionRecipe; const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). @@ -94,10 +95,13 @@ VPVInstructionSC, VPVMemoryInstructionSC, VPVPredInstPHI, + VPVPredicatedMemoryInstructionSC, + VPVPredicatedWidenSC, VPVReductionSC, VPVReplicateSC, VPVWidenSC, VPVWidenCallSC, + VPVWidenEVLSC, VPVWidenGEPSC, VPVWidenIntOrFpIndcutionSC, VPVWidenPHISC, @@ -319,10 +323,13 @@ VPInstructionSC, VPInterleaveSC, VPPredInstPHISC, + VPPredicatedWidenMemoryInstructionSC, + VPPredicatedWidenSC, VPReductionSC, VPReplicateSC, VPWidenCallSC, VPWidenCanonicalIVSC, + VPWidenEVLSC, VPWidenGEPSC, VPWidenIntOrFpInductionSC, VPWidenMemoryInstructionSC,