Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -602,6 +602,9 @@ ATTR_KIND_OPT_FOR_FUZZING = 57, ATTR_KIND_SHADOWCALLSTACK = 58, ATTR_KIND_SPECULATIVE_LOAD_HARDENING = 59, + ATTR_KIND_MASK = 60, + ATTR_KIND_VECTORLENGTH = 61, + ATTR_KIND_PASSTHRU = 62, }; enum ComdatSelectionKindCodes { Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -199,6 +199,7 @@ /// Simple integer binary arithmetic operators. ADD, SUB, MUL, SDIV, UDIV, SREM, UREM, + EVL_ADD, EVL_SUB, EVL_MUL, EVL_SDIV, EVL_UDIV, EVL_SREM, EVL_UREM, /// SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing /// a signed/unsigned value of type i[2*N], and return the full value as @@ -281,6 +282,7 @@ /// Simple binary floating point operators. FADD, FSUB, FMUL, FDIV, FREM, + EVL_FADD, EVL_FSUB, EVL_FMUL, EVL_FDIV, EVL_FREM, /// Constrained versions of the binary floating point operators. /// These will be lowered to the simple operators before final selection. @@ -300,6 +302,7 @@ /// FMA - Perform a * b + c with no intermediate rounding step. FMA, + EVL_FMA, /// FMAD - Perform a * b + c, while getting the same result as the /// separately rounded operations. @@ -385,6 +388,7 @@ /// Bitwise operators - logical and, logical or, logical xor. AND, OR, XOR, + EVL_AND, EVL_OR, EVL_XOR, /// ABS - Determine the unsigned absolute value of a signed integer value of /// the same bitwidth. @@ -408,6 +412,7 @@ /// fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) /// fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW)) SHL, SRA, SRL, ROTL, ROTR, FSHL, FSHR, + EVL_SHL, EVL_SRA, EVL_SRL, /// Byte Swap and Counting operators. BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE, @@ -428,6 +433,13 @@ /// pattern. The condition follows the BooleanContent format of the target. VSELECT, + /// Select with an integer pivot (op #0) and two vector operands (ops #1 + /// and #2), returning a vector result. All vectors have the same length. + /// Similar to the vector select, a comparison of the results element index + /// with the integer pivot selects hether the corresponding result element + /// is taken from op #1 or op #2. + EVL_COMPOSE, + /// Select with condition operator - This selects between a true value and /// a false value (ops #2 and #3) based on the boolean result of comparing /// the lhs and rhs (ops #0 and #1) of a conditional expression with the @@ -584,6 +596,7 @@ FNEG, FABS, FSQRT, FCBRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR, + EVL_FNEG, /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two /// values. // @@ -827,6 +840,7 @@ // Val, OutChain = MLOAD(BasePtr, Mask, PassThru) // OutChain = MSTORE(Value, BasePtr, Mask) MLOAD, MSTORE, + EVL_LOAD, EVL_STORE, // Masked gather and scatter - load and store operations for a vector of // random addresses with additional mask operand that prevents memory @@ -838,6 +852,7 @@ // The Index operand can have more vector elements than the other operands // due to type legalization. The extra elements are ignored. MGATHER, MSCATTER, + EVL_GATHER, EVL_SCATTER, /// This corresponds to the llvm.lifetime.* intrinsics. The first operand /// is the chain and the second operand is the alloca pointer. @@ -869,8 +884,15 @@ VECREDUCE_ADD, VECREDUCE_MUL, VECREDUCE_AND, VECREDUCE_OR, VECREDUCE_XOR, VECREDUCE_SMAX, VECREDUCE_SMIN, VECREDUCE_UMAX, VECREDUCE_UMIN, + + EVL_REDUCE_FADD, EVL_REDUCE_FMUL, + EVL_REDUCE_ADD, EVL_REDUCE_MUL, + EVL_REDUCE_AND, EVL_REDUCE_OR, EVL_REDUCE_XOR, + EVL_REDUCE_SMAX, EVL_REDUCE_SMIN, EVL_REDUCE_UMAX, EVL_REDUCE_UMIN, + /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants. VECREDUCE_FMAX, VECREDUCE_FMIN, + EVL_REDUCE_FMAX, EVL_REDUCE_FMIN, /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific pre-isel opcode values start here. Index: include/llvm/IR/Attributes.td =================================================================== --- include/llvm/IR/Attributes.td +++ include/llvm/IR/Attributes.td @@ -130,6 +130,15 @@ /// Return value is always equal to this argument. def Returned : EnumAttr<"returned">; +/// Return value that is equal to this argument on enabled lanes (mask). +def Passthru : EnumAttr<"passthru">; + +/// Mask argument that applies to this function. +def Mask : EnumAttr<"mask">; + +/// Dynamic Vector Length argument of this function. +def VectorLength : EnumAttr<"vlen">; + /// Function can return twice. def ReturnsTwice : EnumAttr<"returns_twice">; Index: include/llvm/IR/EVLBuilder.h =================================================================== --- /dev/null +++ include/llvm/IR/EVLBuilder.h @@ -0,0 +1,85 @@ +#ifndef LLVM_IR_EVLBUILDER_H +#define LLVM_IR_EVLBUILDER_H + +#include +#include +#include + +namespace llvm { + +enum class EVLTypeToken : int8_t { + Scalar = 1, + Vector = 2 +}; + +using TypeTokenVec = SmallVector; +using ShortTypeVec = SmallVector; +using ShortValueVec = SmallVector; + +struct +EVLIntrinsicDesc { + Intrinsic::ID ID; // LLVM Intrinsic ID. + TypeTokenVec typeTokens; // Type Parmeters for the LLVM Intrinsic. + int MaskPos; // Parameter index of the Mask parameter. + int EVLPos; // Parameter index of the EVL parameter. +}; + +using ValArray = ArrayRef; + +class EVLBuilder { + IRBuilder<> & Builder; + // Explicit mask parameter + Value * Mask; + // Explicit vector length parameter + Value * ExplicitVectorLength; + // Compile-time vector length + int StaticVectorLength; + + // get a vlaid mask/evl argument for the current predication contet + Value& GetMaskForType(VectorType & VecTy); + Value& GetEVLForType(VectorType & VecTy); + +public: + EVLBuilder(IRBuilder<> & _builder) + : Builder(_builder) + , Mask(nullptr) + , ExplicitVectorLength(nullptr) + , StaticVectorLength(-1) + {} + + Module & getModule() const; + + // The cannonical vector type for this \p ElementTy + VectorType& getVectorType(Type &ElementTy); + + // Predication context tracker + EVLBuilder& setMask(Value * _Mask) { Mask = _Mask; return *this; } + EVLBuilder& setEVL(Value * _ExplicitVectorLength) { ExplicitVectorLength = _ExplicitVectorLength; return *this; } + EVLBuilder& setStaticVL(int VLen) { StaticVectorLength = VLen; return *this; } + + EVLIntrinsicDesc GetEVLIntrinsicDesc(unsigned OC); + + // Create a map-vectorized copy of the instruction \p Inst with the underlying IRBuilder instance. + // This operation may return nullptr if the instruction could not be vectorized. + Value* CreateVectorCopy(Instruction & Inst, ValArray VecOpArray); + + Value& CreateGEP(ValArray VecOpArray); + + Value& CreateFAdd(ValArray VecOpArray); + Value& CreateFDiv(ValArray VecOpArray); + Value& CreateFMul(ValArray VecOpArray); + Value& CreateFSub(ValArray VecOpArray); + + Value& CreateContiguousStore(Value & Val, Value & Pointer); + + Value& CreateContiguousLoad(Value & Pointer, Value * Passthru = nullptr); + + Value& CreateScatter(Value & Val, Value & PointerVec); + + Value& CreateGather(Value & PointerVec, Value * Passthru = nullptr); +}; + + +} // namespace llvm + +#endif // LLVM_IR_EVLBUILDER_H Index: include/llvm/IR/IntrinsicInst.h =================================================================== --- include/llvm/IR/IntrinsicInst.h +++ include/llvm/IR/IntrinsicInst.h @@ -206,6 +206,73 @@ /// @} }; + class EVLIntrinsic : public IntrinsicInst { + public: + + bool isUnaryOp() const; + bool isBinaryOp() const; + bool isTernaryOp() const; + + Value* GetMask() const; + Value* GetVectorLength() const; + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + case Intrinsic::evl_and: + case Intrinsic::evl_or: + case Intrinsic::evl_xor: + case Intrinsic::evl_ashr: + case Intrinsic::evl_lshr: + case Intrinsic::evl_shl: + + case Intrinsic::evl_select: + case Intrinsic::evl_compose: + case Intrinsic::evl_compress: + case Intrinsic::evl_expand: + + case Intrinsic::evl_load: + case Intrinsic::evl_store: + case Intrinsic::evl_compressstore: + case Intrinsic::evl_expandload: + + case Intrinsic::evl_fadd: + case Intrinsic::evl_fsub: + case Intrinsic::evl_fmul: + case Intrinsic::evl_fdiv: + case Intrinsic::evl_frem: + case Intrinsic::evl_fma: + + case Intrinsic::evl_add: + case Intrinsic::evl_sub: + case Intrinsic::evl_mul: + case Intrinsic::evl_udiv: + case Intrinsic::evl_sdiv: + case Intrinsic::evl_urem: + case Intrinsic::evl_srem: + + case Intrinsic::evl_reduce_and: + case Intrinsic::evl_reduce_or: + case Intrinsic::evl_reduce_xor: + case Intrinsic::evl_reduce_fadd: + case Intrinsic::evl_reduce_fmax: + case Intrinsic::evl_reduce_fmin: + case Intrinsic::evl_reduce_fmul: + case Intrinsic::evl_reduce_mul: + case Intrinsic::evl_reduce_umax: + case Intrinsic::evl_reduce_umin: + case Intrinsic::evl_reduce_smax: + case Intrinsic::evl_reduce_smin: + return true; + + default: return false; + } + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + }; + /// This is the common base class for constrained floating point intrinsics. class ConstrainedFPIntrinsic : public IntrinsicInst { public: Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -88,6 +88,25 @@ int ArgNo = argNo; } +// VectorLength - The specified argument is the Dynamic Vector Length of the +// operation. +class VectorLength : IntrinsicProperty { + int ArgNo = argNo; +} + +// Mask - The specified argument contains the per-lane mask of this +// intrinsic. Inputs on masked-out lanes must not effect the result of this +// intrinsic (except for the Passthru argument). +class Mask : IntrinsicProperty { + int ArgNo = argNo; +} +// Passthru - The specified argument contains the per-lane return value +// for this vector intrinsic where the mask is false. +// (requires the Mask attribute in the same function) +class Passthru : IntrinsicProperty { + int ArgNo = argNo; +} + def IntrNoReturn : IntrinsicProperty; // IntrCold - Calls to this intrinsic are cold. @@ -996,6 +1015,261 @@ // Intrinsic to detect whether its argument is a constant. def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem], "llvm.is.constant">; +//===---------------- Masked/Explicit Vector Length Intrinsics --------------===// + +// Memory Intrinsics +def int_evl_store : Intrinsic<[], [llvm_anyvector_ty, + LLVMAnyPointerType>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrArgMemOnly, Mask<2>, VectorLength<3>]>; + +def int_evl_load : Intrinsic<[llvm_anyvector_ty], + [LLVMAnyPointerType>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, Mask<2>, VectorLength<3>]>; + +def int_evl_gather: Intrinsic<[llvm_anyvector_ty], + [LLVMVectorOfAnyPointersToElt<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrReadMem, Mask<2>, VectorLength<3>]>; + +def int_evl_scatter: Intrinsic<[], + [llvm_anyvector_ty, + LLVMVectorOfAnyPointersToElt<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [Mask<2>, VectorLength<3>]>; + +def int_evl_expandload: Intrinsic<[llvm_anyvector_ty], + [LLVMPointerToElt<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrReadMem, Mask<2>, VectorLength<3>]>; + +def int_evl_compressstore: Intrinsic<[], + [llvm_anyvector_ty, + LLVMPointerToElt<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrArgMemOnly, Mask<2>, VectorLength<3>]>; + +// Reductions +def int_evl_reduce_fadd : Intrinsic<[llvm_anyfloat_ty], + [llvm_anyfloat_ty, + llvm_anyvector_ty, + LLVMVectorSameWidth<2, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Mask<2>, VectorLength<3>]>; +def int_evl_reduce_fmul : Intrinsic<[llvm_anyfloat_ty], + [llvm_anyfloat_ty, + llvm_anyvector_ty, + LLVMVectorSameWidth<2, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<2>, VectorLength<3>]>; +def int_evl_reduce_add : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<1>, VectorLength<2>]>; +def int_evl_reduce_mul : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<1>, VectorLength<2>]>; +def int_evl_reduce_and : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<1>, VectorLength<2>]>; +def int_evl_reduce_or : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<1>, VectorLength<2>]>; +def int_evl_reduce_xor : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<1>, VectorLength<2>]>; +def int_evl_reduce_smax : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<1>, VectorLength<2>]>; +def int_evl_reduce_smin : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<1>, VectorLength<2>]>; +def int_evl_reduce_umax : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<1>, VectorLength<2>]>; +def int_evl_reduce_umin : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<1>, VectorLength<2>]>; +def int_evl_reduce_fmax : Intrinsic<[llvm_anyfloat_ty], + [llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<1>, VectorLength<2>]>; +def int_evl_reduce_fmin : Intrinsic<[llvm_anyfloat_ty], + [llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<1>, VectorLength<2>]>; + +// Binary operators +let IntrProperties = [IntrNoMem, Mask<2>, VectorLength<3>] in { + def int_evl_add : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_sub : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_mul : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_sdiv : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_udiv : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_srem : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_urem : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + + def int_evl_fneg : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_fadd : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_fsub : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_fmul : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_fdiv : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_frem : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + +// Logical operators + def int_evl_ashr : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_lshr : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_shl : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_or : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_and : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_xor : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +} + +def int_evl_fma : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<3>, VectorLength<4>]>; + +// Shuffle +def int_evl_expand: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<2>, VectorLength<3>]>; + +def int_evl_compress: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<2>, VectorLength<3>]>; + +// Select +def int_evl_select : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [IntrNoMem, Mask<2>, VectorLength<3>]>; + +// Compose +def int_evl_compose : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty, + llvm_i32_ty], + [IntrNoMem, VectorLength<2>]>; + + + //===-------------------------- Masked Intrinsics -------------------------===// // Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -129,6 +129,13 @@ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> ]>; +def SDTIntBinOpEVL : SDTypeProfile<1, 4, [ // evl_add, evl_and, etc. + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<4>, SDTCisSameNumEltsAs<0, 3> +]>; +def SDTIntShiftOpEVL : SDTypeProfile<1, 4, [ // shl, sra, srl + SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>, SDTCisInt<4>, SDTCisSameNumEltsAs<0, 3> +]>; + def SDTFPBinOp : SDTypeProfile<1, 2, [ // fadd, fmul, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0> ]>; @@ -171,6 +178,16 @@ SDTCisOpSmallerThanOp<1, 0> ]>; +def SDTFPUnOpEVL : SDTypeProfile<1, 3, [ // evl_fneg, etc. + SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<3>, SDTCisSameNumEltsAs<0, 2> +]>; +def SDTFPBinOpEVL : SDTypeProfile<1, 4, [ // evl_fadd, etc. + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<4>, SDTCisSameNumEltsAs<0, 3> +]>; +def SDTFPTernaryOpEVL : SDTypeProfile<1, 5, [ // evl_fmadd, etc. + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0>, SDTCisInt<5>, SDTCisSameNumEltsAs<0, 4> +]>; + def SDTSetCC : SDTypeProfile<1, 3, [ // setcc SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> ]>; @@ -183,6 +200,10 @@ SDTCisVec<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisSameNumEltsAs<0, 1> ]>; +def SDTVSelectEVL : SDTypeProfile<1, 5, [ // evl_vselect + SDTCisVec<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisSameNumEltsAs<0, 1>, SDTCisInt<5>, SDTCisSameNumEltsAs<0, 4> +]>; + def SDTSelectCC : SDTypeProfile<1, 5, [ // select_cc SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>, SDTCisSameAs<0, 3>, SDTCisVT<5, OtherVT> @@ -226,11 +247,20 @@ SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2> ]>; +def SDTStoreEVL: SDTypeProfile<0, 4, [ // evl store + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>, SDTCisInt<3> +]>; + def SDTMaskedLoad: SDTypeProfile<1, 3, [ // masked load SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>, SDTCisSameNumEltsAs<0, 2> ]>; +def SDTLoadEVL : SDTypeProfile<1, 3, [ // evl load + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisSameNumEltsAs<0, 2>, SDTCisInt<3>, + SDTCisSameNumEltsAs<0, 2> +]>; + def SDTVecShuffle : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; @@ -381,6 +411,26 @@ def umax : SDNode<"ISD::UMAX" , SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; +def evl_and : SDNode<"ISD::EVL_AND" , SDTIntBinOpEVL , + [SDNPCommutative, SDNPAssociative]>; +def evl_or : SDNode<"ISD::EVL_OR" , SDTIntBinOpEVL , + [SDNPCommutative, SDNPAssociative]>; +def evl_xor : SDNode<"ISD::EVL_XOR" , SDTIntBinOpEVL , + [SDNPCommutative, SDNPAssociative]>; +def evl_srl : SDNode<"ISD::EVL_SRL" , SDTIntShiftOpEVL>; +def evl_sra : SDNode<"ISD::EVL_SRA" , SDTIntShiftOpEVL>; +def evl_shl : SDNode<"ISD::EVL_SHL" , SDTIntShiftOpEVL>; + +def evl_add : SDNode<"ISD::EVL_ADD" , SDTIntBinOpEVL , + [SDNPCommutative, SDNPAssociative]>; +def evl_sub : SDNode<"ISD::EVL_SUB" , SDTIntBinOpEVL>; +def evl_mul : SDNode<"ISD::EVL_MUL" , SDTIntBinOpEVL, + [SDNPCommutative, SDNPAssociative]>; +def evl_sdiv : SDNode<"ISD::EVL_SDIV" , SDTIntBinOpEVL>; +def evl_udiv : SDNode<"ISD::EVL_UDIV" , SDTIntBinOpEVL>; +def evl_srem : SDNode<"ISD::EVL_SREM" , SDTIntBinOpEVL>; +def evl_urem : SDNode<"ISD::EVL_UREM" , SDTIntBinOpEVL>; + def saddsat : SDNode<"ISD::SADDSAT" , SDTIntBinOp, [SDNPCommutative]>; def uaddsat : SDNode<"ISD::UADDSAT" , SDTIntBinOp, [SDNPCommutative]>; def ssubsat : SDNode<"ISD::SSUBSAT" , SDTIntBinOp>; @@ -448,6 +498,14 @@ def fpextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>; def fcopysign : SDNode<"ISD::FCOPYSIGN" , SDTFPSignOp>; +def evl_fneg : SDNode<"ISD::EVL_FNEG" , SDTFPUnOpEVL>; +def evl_fadd : SDNode<"ISD::EVL_FADD" , SDTFPBinOpEVL, [SDNPCommutative]>; +def evl_fsub : SDNode<"ISD::EVL_FSUB" , SDTFPBinOpEVL>; +def evl_fmul : SDNode<"ISD::EVL_FMUL" , SDTFPBinOpEVL, [SDNPCommutative]>; +def evl_fdiv : SDNode<"ISD::EVL_FDIV" , SDTFPBinOpEVL>; +def evl_frem : SDNode<"ISD::EVL_FREM" , SDTFPBinOpEVL>; +def evl_fma : SDNode<"ISD::EVL_FMA" , SDTFPTernaryOpEVL>; + def sint_to_fp : SDNode<"ISD::SINT_TO_FP" , SDTIntToFPOp>; def uint_to_fp : SDNode<"ISD::UINT_TO_FP" , SDTIntToFPOp>; def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>; @@ -455,10 +513,10 @@ def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>; def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>; -def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; -def select : SDNode<"ISD::SELECT" , SDTSelect>; -def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>; -def selectcc : SDNode<"ISD::SELECT_CC" , SDTSelectCC>; +def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; +def select : SDNode<"ISD::SELECT" , SDTSelect>; +def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>; +def selectcc : SDNode<"ISD::SELECT_CC" , SDTSelectCC>; def brcc : SDNode<"ISD::BR_CC" , SDTBrCC, [SDNPHasChain]>; def brcond : SDNode<"ISD::BRCOND" , SDTBrcond, [SDNPHasChain]>; @@ -521,6 +579,11 @@ def masked_load : SDNode<"ISD::MLOAD", SDTMaskedLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def evl_store : SDNode<"ISD::EVL_STORE", SDTMaskedStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def evl_load : SDNode<"ISD::EVL_LOAD", SDTMaskedLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + // Do not use ld, st directly. Use load, extload, sextload, zextload, store, // and truncst (see below). def ld : SDNode<"ISD::LOAD" , SDTLoad, Index: lib/AsmParser/LLLexer.cpp =================================================================== --- lib/AsmParser/LLLexer.cpp +++ lib/AsmParser/LLLexer.cpp @@ -643,6 +643,7 @@ KEYWORD(inlinehint); KEYWORD(inreg); KEYWORD(jumptable); + KEYWORD(mask); KEYWORD(minsize); KEYWORD(naked); KEYWORD(nest); @@ -662,6 +663,7 @@ KEYWORD(optforfuzzing); KEYWORD(optnone); KEYWORD(optsize); + KEYWORD(passthru); KEYWORD(readnone); KEYWORD(readonly); KEYWORD(returned); @@ -683,6 +685,7 @@ KEYWORD(swifterror); KEYWORD(swiftself); KEYWORD(uwtable); + KEYWORD(vlen); KEYWORD(writeonly); KEYWORD(zeroext); Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -1295,14 +1295,17 @@ case lltok::kw_dereferenceable: case lltok::kw_dereferenceable_or_null: case lltok::kw_inalloca: + case lltok::kw_mask: case lltok::kw_nest: case lltok::kw_noalias: case lltok::kw_nocapture: case lltok::kw_nonnull: + case lltok::kw_passthru: case lltok::kw_returned: case lltok::kw_sret: case lltok::kw_swifterror: case lltok::kw_swiftself: + case lltok::kw_vlen: HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute on a function"); @@ -1583,10 +1586,12 @@ } case lltok::kw_inalloca: B.addAttribute(Attribute::InAlloca); break; case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; + case lltok::kw_mask: B.addAttribute(Attribute::Mask); break; case lltok::kw_nest: B.addAttribute(Attribute::Nest); break; case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; case lltok::kw_nocapture: B.addAttribute(Attribute::NoCapture); break; case lltok::kw_nonnull: B.addAttribute(Attribute::NonNull); break; + case lltok::kw_passthru: B.addAttribute(Attribute::Passthru); break; case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break; case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; case lltok::kw_returned: B.addAttribute(Attribute::Returned); break; @@ -1594,6 +1599,7 @@ case lltok::kw_sret: B.addAttribute(Attribute::StructRet); break; case lltok::kw_swifterror: B.addAttribute(Attribute::SwiftError); break; case lltok::kw_swiftself: B.addAttribute(Attribute::SwiftSelf); break; + case lltok::kw_vlen: B.addAttribute(Attribute::VectorLength); break; case lltok::kw_writeonly: B.addAttribute(Attribute::WriteOnly); break; case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; @@ -1684,12 +1690,15 @@ // Error handling. case lltok::kw_byval: case lltok::kw_inalloca: + case lltok::kw_mask: case lltok::kw_nest: case lltok::kw_nocapture: + case lltok::kw_passthru: case lltok::kw_returned: case lltok::kw_sret: case lltok::kw_swifterror: case lltok::kw_swiftself: + case lltok::kw_vlen: HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute"); break; @@ -3295,7 +3304,7 @@ ID.Kind = ValID::t_Constant; return false; } - + // Unary Operators. case lltok::kw_fneg: { unsigned Opc = Lex.getUIntVal(); @@ -3305,7 +3314,7 @@ ParseGlobalTypeAndValue(Val) || ParseToken(lltok::rparen, "expected ')' in unary constantexpr")) return true; - + // Check that the type is valid for the operator. switch (Opc) { case Instruction::FNeg: @@ -6170,11 +6179,11 @@ Valid = LHS->getType()->isIntOrIntVectorTy() || LHS->getType()->isFPOrFPVectorTy(); break; - case 1: - Valid = LHS->getType()->isIntOrIntVectorTy(); + case 1: + Valid = LHS->getType()->isIntOrIntVectorTy(); break; - case 2: - Valid = LHS->getType()->isFPOrFPVectorTy(); + case 2: + Valid = LHS->getType()->isFPOrFPVectorTy(); break; } Index: lib/AsmParser/LLToken.h =================================================================== --- lib/AsmParser/LLToken.h +++ lib/AsmParser/LLToken.h @@ -187,6 +187,7 @@ kw_inlinehint, kw_inreg, kw_jumptable, + kw_mask, kw_minsize, kw_naked, kw_nest, @@ -206,6 +207,7 @@ kw_optforfuzzing, kw_optnone, kw_optsize, + kw_passthru, kw_readnone, kw_readonly, kw_returned, @@ -225,6 +227,7 @@ kw_swifterror, kw_swiftself, kw_uwtable, + kw_vlen, kw_writeonly, kw_zeroext, Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -1331,6 +1331,8 @@ return Attribute::InReg; case bitc::ATTR_KIND_JUMP_TABLE: return Attribute::JumpTable; + case bitc::ATTR_KIND_MASK: + return Attribute::Mask; case bitc::ATTR_KIND_MIN_SIZE: return Attribute::MinSize; case bitc::ATTR_KIND_NAKED: @@ -1375,6 +1377,8 @@ return Attribute::OptimizeForSize; case bitc::ATTR_KIND_OPTIMIZE_NONE: return Attribute::OptimizeNone; + case bitc::ATTR_KIND_PASSTHRU: + return Attribute::Passthru; case bitc::ATTR_KIND_READ_NONE: return Attribute::ReadNone; case bitc::ATTR_KIND_READ_ONLY: @@ -1419,6 +1423,8 @@ return Attribute::SwiftSelf; case bitc::ATTR_KIND_UW_TABLE: return Attribute::UWTable; + case bitc::ATTR_KIND_VECTORLENGTH: + return Attribute::VectorLength; case bitc::ATTR_KIND_WRITEONLY: return Attribute::WriteOnly; case bitc::ATTR_KIND_Z_EXT: Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -669,6 +669,12 @@ return bitc::ATTR_KIND_READ_ONLY; case Attribute::Returned: return bitc::ATTR_KIND_RETURNED; + case Attribute::Mask: + return bitc::ATTR_KIND_MASK; + case Attribute::VectorLength: + return bitc::ATTR_KIND_VECTORLENGTH; + case Attribute::Passthru: + return bitc::ATTR_KIND_PASSTHRU; case Attribute::ReturnsTwice: return bitc::ATTR_KIND_RETURNS_TWICE; case Attribute::SExt: Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -935,6 +935,7 @@ const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); + void visitExplicitVectorLengthIntrinsic(const EVLIntrinsic &EVLI); void visitVAStart(const CallInst &I); void visitVAArg(const VAArgInst &I); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5673,6 +5673,55 @@ case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast(I)); return nullptr; + + case Intrinsic::evl_and: + case Intrinsic::evl_or: + case Intrinsic::evl_xor: + case Intrinsic::evl_ashr: + case Intrinsic::evl_lshr: + case Intrinsic::evl_shl: + + case Intrinsic::evl_select: + case Intrinsic::evl_compose: + case Intrinsic::evl_compress: + case Intrinsic::evl_expand: + + case Intrinsic::evl_load: + case Intrinsic::evl_store: + case Intrinsic::evl_compressstore: + case Intrinsic::evl_expandload: + + case Intrinsic::evl_fadd: + case Intrinsic::evl_fsub: + case Intrinsic::evl_fmul: + case Intrinsic::evl_fdiv: + case Intrinsic::evl_frem: + case Intrinsic::evl_fma: + + case Intrinsic::evl_add: + case Intrinsic::evl_sub: + case Intrinsic::evl_mul: + case Intrinsic::evl_udiv: + case Intrinsic::evl_sdiv: + case Intrinsic::evl_urem: + case Intrinsic::evl_srem: + + case Intrinsic::evl_reduce_and: + case Intrinsic::evl_reduce_or: + case Intrinsic::evl_reduce_xor: + case Intrinsic::evl_reduce_fadd: + case Intrinsic::evl_reduce_fmax: + case Intrinsic::evl_reduce_fmin: + case Intrinsic::evl_reduce_fmul: + case Intrinsic::evl_reduce_mul: + case Intrinsic::evl_reduce_umax: + case Intrinsic::evl_reduce_umin: + case Intrinsic::evl_reduce_smax: + case Intrinsic::evl_reduce_smin: { + visitExplicitVectorLengthIntrinsic(cast(I)); + return nullptr; + } + case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -6486,6 +6535,77 @@ setValue(&FPI, FPResult); } +void SelectionDAGBuilder::visitExplicitVectorLengthIntrinsic( + const EVLIntrinsic & EVLInst) { + SDLoc sdl = getCurSDLoc(); + unsigned Opcode; + switch (EVLInst.getIntrinsicID()) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::evl_add: Opcode = ISD::EVL_ADD; break; + case Intrinsic::evl_sub: Opcode = ISD::EVL_SUB; break; + case Intrinsic::evl_mul: Opcode = ISD::EVL_MUL; break; + case Intrinsic::evl_udiv: Opcode = ISD::EVL_UDIV; break; + case Intrinsic::evl_sdiv: Opcode = ISD::EVL_SDIV; break; + case Intrinsic::evl_urem: Opcode = ISD::EVL_UREM; break; + case Intrinsic::evl_srem: Opcode = ISD::EVL_SREM; break; + + case Intrinsic::evl_and: Opcode = ISD::EVL_AND; break; + case Intrinsic::evl_or: Opcode = ISD::EVL_OR; break; + case Intrinsic::evl_xor: Opcode = ISD::EVL_XOR; break; + + case Intrinsic::evl_fneg: Opcode = ISD::EVL_FNEG; break; + case Intrinsic::evl_fadd: Opcode = ISD::EVL_FADD; break; + case Intrinsic::evl_fsub: Opcode = ISD::EVL_FSUB; break; + case Intrinsic::evl_fmul: Opcode = ISD::EVL_FMUL; break; + case Intrinsic::evl_fdiv: Opcode = ISD::EVL_FDIV; break; + case Intrinsic::evl_frem: Opcode = ISD::EVL_FREM; break; + case Intrinsic::evl_fma: Opcode = ISD::EVL_FMA; break; + case Intrinsic::evl_compose: Opcode = ISD::EVL_COMPOSE; break; + } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Chain = getRoot(); + SmallVector ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), EVLInst.getType(), ValueVTs); + ValueVTs.push_back(MVT::Other); // Out chain + + SDVTList VTs = DAG.getVTList(ValueVTs); + SDValue Result; + switch (EVLInst.getNumArgOperands()) { + default: + llvm_unreachable("unexpected number of arguments to evl intrinsic"); + case 3: + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(EVLInst.getArgOperand(0)), + getValue(EVLInst.getArgOperand(1)), + getValue(EVLInst.getArgOperand(2)) }); + break; + + case 4: + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(EVLInst.getArgOperand(0)), + getValue(EVLInst.getArgOperand(1)), + getValue(EVLInst.getArgOperand(2)), + getValue(EVLInst.getArgOperand(3)) }); + break; + + case 5: + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(EVLInst.getArgOperand(0)), + getValue(EVLInst.getArgOperand(1)), + getValue(EVLInst.getArgOperand(2)), + getValue(EVLInst.getArgOperand(3)), + getValue(EVLInst.getArgOperand(4)) }); + break; + } + + assert(Result.getNode()->getNumValues() == 2); + SDValue OutChain = Result.getValue(1); + DAG.setRoot(OutChain); + SDValue EVLResult = Result.getValue(0); + setValue(&EVLInst, EVLResult); +} + std::pair SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB) { Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -215,6 +215,9 @@ case ISD::FLOG10: return "flog10"; case ISD::STRICT_FLOG10: return "strict_flog10"; + // Explicit vector Length Unary operators + case ISD::EVL_FNEG: return "evl_fneg"; + // Binary operators case ISD::ADD: return "add"; case ISD::SUB: return "sub"; @@ -262,6 +265,27 @@ case ISD::UMIN: return "umin"; case ISD::UMAX: return "umax"; + // Explicit Vector Length Binary operators + case ISD::EVL_ADD: return "evl_add"; + case ISD::EVL_SUB: return "evl_sub"; + case ISD::EVL_MUL: return "evl_mul"; + case ISD::EVL_SDIV: return "evl_sdiv"; + case ISD::EVL_UDIV: return "evl_udiv"; + case ISD::EVL_SREM: return "evl_srem"; + case ISD::EVL_UREM: return "evl_urem"; + case ISD::EVL_AND: return "evl_and"; + case ISD::EVL_OR: return "evl_or"; + case ISD::EVL_XOR: return "evl_xor"; + case ISD::EVL_SHL: return "evl_shl"; + case ISD::EVL_SRA: return "evl_sra"; + case ISD::EVL_SRL: return "evl_srl"; + case ISD::EVL_FADD: return "evl_fadd"; + case ISD::EVL_FSUB: return "evl_fsub"; + case ISD::EVL_FMUL: return "evl_fmul"; + case ISD::EVL_FDIV: return "evl_fdiv"; + case ISD::EVL_FMA: return "evl_fma"; + case ISD::EVL_FREM: return "evl_frem"; + case ISD::FPOWI: return "fpowi"; case ISD::STRICT_FPOWI: return "strict_fpowi"; case ISD::SETCC: return "setcc"; @@ -293,6 +317,8 @@ case ISD::SRA_PARTS: return "sra_parts"; case ISD::SRL_PARTS: return "srl_parts"; + case ISD::EVL_COMPOSE: return "evl_compose"; + case ISD::SADDSAT: return "saddsat"; case ISD::UADDSAT: return "uaddsat"; case ISD::SSUBSAT: return "ssubsat"; @@ -419,6 +445,20 @@ case ISD::VECREDUCE_UMIN: return "vecreduce_umin"; case ISD::VECREDUCE_FMAX: return "vecreduce_fmax"; case ISD::VECREDUCE_FMIN: return "vecreduce_fmin"; + + case ISD::EVL_REDUCE_FADD: return "evl_reduce_fadd"; + case ISD::EVL_REDUCE_FMUL: return "evl_reduce_fmul"; + case ISD::EVL_REDUCE_ADD: return "evl_reduce_add"; + case ISD::EVL_REDUCE_MUL: return "evl_reduce_mul"; + case ISD::EVL_REDUCE_AND: return "evl_reduce_and"; + case ISD::EVL_REDUCE_OR: return "evl_reduce_or"; + case ISD::EVL_REDUCE_XOR: return "evl_reduce_xor"; + case ISD::EVL_REDUCE_SMAX: return "evl_reduce_smax"; + case ISD::EVL_REDUCE_SMIN: return "evl_reduce_smin"; + case ISD::EVL_REDUCE_UMAX: return "evl_reduce_umax"; + case ISD::EVL_REDUCE_UMIN: return "evl_reduce_umin"; + case ISD::EVL_REDUCE_FMAX: return "evl_reduce_fmax"; + case ISD::EVL_REDUCE_FMIN: return "evl_reduce_fmin"; } } Index: lib/IR/Attributes.cpp =================================================================== --- lib/IR/Attributes.cpp +++ lib/IR/Attributes.cpp @@ -257,6 +257,8 @@ return "byval"; if (hasAttribute(Attribute::Convergent)) return "convergent"; + if (hasAttribute(Attribute::VectorLength)) + return "vlen"; if (hasAttribute(Attribute::SwiftError)) return "swifterror"; if (hasAttribute(Attribute::SwiftSelf)) @@ -273,6 +275,10 @@ return "inreg"; if (hasAttribute(Attribute::JumpTable)) return "jumptable"; + if (hasAttribute(Attribute::Mask)) + return "mask"; + if (hasAttribute(Attribute::Passthru)) + return "passthru"; if (hasAttribute(Attribute::MinSize)) return "minsize"; if (hasAttribute(Attribute::Naked)) Index: lib/IR/CMakeLists.txt =================================================================== --- lib/IR/CMakeLists.txt +++ lib/IR/CMakeLists.txt @@ -22,6 +22,7 @@ DiagnosticPrinter.cpp Dominators.cpp DomTreeUpdater.cpp + EVLBuilder.cpp Function.cpp GVMaterializer.cpp Globals.cpp Index: lib/IR/EVLBuilder.cpp =================================================================== --- /dev/null +++ lib/IR/EVLBuilder.cpp @@ -0,0 +1,162 @@ +#include +#include +#include + +#include + +namespace llvm { + +Module & +EVLBuilder::getModule() const { + return *Builder.GetInsertBlock()->getParent()->getParent(); +} + +EVLIntrinsicDesc +EVLBuilder::GetEVLIntrinsicDesc(unsigned OC) { + switch (OC) { + // fp + case Instruction::FAdd: return EVLIntrinsicDesc{ Intrinsic::evl_fadd, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::FSub: return EVLIntrinsicDesc{ Intrinsic::evl_fsub, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::FMul: return EVLIntrinsicDesc{ Intrinsic::evl_fmul, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::FDiv: return EVLIntrinsicDesc{ Intrinsic::evl_fdiv, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::FRem: return EVLIntrinsicDesc{ Intrinsic::evl_frem, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + + // sign-oblivious + case Instruction::Add: return EVLIntrinsicDesc{ Intrinsic::evl_add, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::Sub: return EVLIntrinsicDesc{ Intrinsic::evl_sub, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::Mul: return EVLIntrinsicDesc{ Intrinsic::evl_mul, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + + // signed + case Instruction::SDiv: return EVLIntrinsicDesc{ Intrinsic::evl_sdiv, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::UDiv: return EVLIntrinsicDesc{ Intrinsic::evl_udiv, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::SRem: return EVLIntrinsicDesc{ Intrinsic::evl_srem, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::URem: return EVLIntrinsicDesc{ Intrinsic::evl_urem, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + + default: + return EVLIntrinsicDesc{Intrinsic::not_intrinsic, TypeTokenVec(), -1, -1}; + } +} + +static +ShortTypeVec +EncodeTypeTokens(TypeTokenVec TTVec, Type & VectorTy, Type & ScalarTy) { + ShortTypeVec STV; + + for (auto Token : TTVec) { + switch (Token) { + case EVLTypeToken::Scalar: STV.push_back(&ScalarTy); break; + case EVLTypeToken::Vector: STV.push_back(&VectorTy); break; + default: abort(); // unsupported EVLTypeToken + } + } + + return STV; +} + +Value& +EVLBuilder::GetMaskForType(VectorType & VecTy) { + if (Mask) return *Mask; + + auto * boolTy = Builder.getInt1Ty(); + auto * maskTy = VectorType::get(boolTy, StaticVectorLength); + return *ConstantInt::getAllOnesValue(maskTy); +} + +Value& +EVLBuilder::GetEVLForType(VectorType & VecTy) { + if (ExplicitVectorLength) return *ExplicitVectorLength; + + // TODO SVE + auto * intTy = Builder.getInt32Ty(); + return *ConstantInt::get(intTy, StaticVectorLength); +} + +Value* +EVLBuilder::CreateVectorCopy(Instruction & Inst, ValArray VecOpArray) { + auto oc = Inst.getOpcode(); + + if ((oc <= Instruction::BinaryOpsEnd) && + (oc >= Instruction::BinaryOpsBegin)) { + assert(VecOpArray.size() == 2); + Value & FirstOp = *VecOpArray[0]; + Value & SndOp = *VecOpArray[1]; + + // Fetch the EVL intrinsic + auto & VecTy = cast(*FirstOp.getType()); + auto & ScalarTy = *VecTy.getVectorElementType(); + auto evlDesc = GetEVLIntrinsicDesc(oc); + if (evlDesc.ID == Intrinsic::not_intrinsic) { + return nullptr; + } + + assert (evlDesc.ID != Intrinsic::not_intrinsic); + auto * Func = Intrinsic::getDeclaration(&getModule(), evlDesc.ID, EncodeTypeTokens(evlDesc.typeTokens, VecTy, ScalarTy)); + + assert((evlDesc.MaskPos == 2) && (evlDesc.EVLPos == 3)); + + // Materialize the Call + ShortValueVec Args{&FirstOp, &SndOp, &GetMaskForType(VecTy), &GetEVLForType(VecTy)}; + + auto & EVLCall = *Builder.CreateCall(Func, Args); + + // transfer fast math flags + if (isa(Inst)) { + cast(EVLCall).copyFastMathFlags(Inst.getFastMathFlags()); + } + + return &EVLCall; + } + + return nullptr; +} + +VectorType& +EVLBuilder::getVectorType(Type &ElementTy) { + return *VectorType::get(&ElementTy, StaticVectorLength); +} + +Value& +EVLBuilder::CreateContiguousStore(Value & Val, Value & Pointer) { + auto & VecTy = cast(*Val.getType()); + auto * StoreFunc = Intrinsic::getDeclaration(&getModule(), Intrinsic::evl_store, {Val.getType(), Pointer.getType()}); + ShortValueVec Args{&Val, &Pointer, &GetMaskForType(VecTy), &GetEVLForType(VecTy)}; + return *Builder.CreateCall(StoreFunc, Args); +} + +Value& +EVLBuilder::CreateContiguousLoad(Value & Pointer, Value * Passthru) { + auto & PointerTy = cast(*Pointer.getType()); + auto & VecTy = getVectorType(*PointerTy.getPointerElementType()); + + auto * LoadFunc = Intrinsic::getDeclaration(&getModule(), Intrinsic::evl_load, {&VecTy, &PointerTy}); + if (!Passthru) { + Passthru = UndefValue::get(&VecTy); + } + + ShortValueVec Args{&Pointer, Passthru, &GetMaskForType(VecTy), &GetEVLForType(VecTy)}; + return *Builder.CreateCall(LoadFunc, Args); +} + +Value& +EVLBuilder::CreateScatter(Value & Val, Value & PointerVec) { + auto & VecTy = cast(*Val.getType()); + auto * ScatterFunc = Intrinsic::getDeclaration(&getModule(), Intrinsic::evl_scatter, {Val.getType(), PointerVec.getType()}); + ShortValueVec Args{&Val, &PointerVec, &GetMaskForType(VecTy), &GetEVLForType(VecTy)}; + return *Builder.CreateCall(ScatterFunc, Args); +} + +Value& +EVLBuilder::CreateGather(Value & PointerVec, Value * Passthru) { + auto & PointerVecTy = cast(*PointerVec.getType()); + auto & ElemTy = *cast(*PointerVecTy.getVectorElementType()).getPointerElementType(); + auto & VecTy = *VectorType::get(&ElemTy, PointerVecTy.getNumElements()); + auto * GatherFunc = Intrinsic::getDeclaration(&getModule(), Intrinsic::evl_gather, {&VecTy, &PointerVecTy}); + + if (!Passthru) { + Passthru = UndefValue::get(&VecTy); + } + ShortValueVec Args{&PointerVec, Passthru, &GetMaskForType(VecTy), &GetEVLForType(VecTy)}; + return *Builder.CreateCall(GatherFunc, Args); +} + +} // namespace llvm Index: lib/IR/IntrinsicInst.cpp =================================================================== --- lib/IR/IntrinsicInst.cpp +++ lib/IR/IntrinsicInst.cpp @@ -138,6 +138,70 @@ .Default(ebInvalid); } +bool EVLIntrinsic::isUnaryOp() const { + switch (getIntrinsicID()) { + default: + return false; + case Intrinsic::evl_fneg: + return true; + } +} + +Value* +EVLIntrinsic::GetMask() const { + if (isBinaryOp()) { return getArgOperand(2); } + else if (isTernaryOp()) { return getArgOperand(3); } + else if (isUnaryOp()) { return getArgOperand(1); } + else return nullptr; +} + +Value* +EVLIntrinsic::GetVectorLength() const { + if (isBinaryOp()) { return getArgOperand(3); } + else if (isTernaryOp()) { return getArgOperand(4); } + else if (isUnaryOp()) { return getArgOperand(2); } + else return nullptr; +} + +bool EVLIntrinsic::isBinaryOp() const { + switch (getIntrinsicID()) { + default: + return false; + + case Intrinsic::evl_and: + case Intrinsic::evl_or: + case Intrinsic::evl_xor: + case Intrinsic::evl_ashr: + case Intrinsic::evl_lshr: + case Intrinsic::evl_shl: + + case Intrinsic::evl_fadd: + case Intrinsic::evl_fsub: + case Intrinsic::evl_fmul: + case Intrinsic::evl_fdiv: + case Intrinsic::evl_frem: + + case Intrinsic::evl_add: + case Intrinsic::evl_sub: + case Intrinsic::evl_mul: + case Intrinsic::evl_udiv: + case Intrinsic::evl_sdiv: + case Intrinsic::evl_urem: + case Intrinsic::evl_srem: + return true; + } +} + +bool EVLIntrinsic::isTernaryOp() const { + switch (getIntrinsicID()) { + default: + return false; + case Intrinsic::evl_fma: + case Intrinsic::evl_select: + return true; + } +} + bool ConstrainedFPIntrinsic::isUnaryOp() const { switch (getIntrinsicID()) { default: Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -1653,11 +1653,14 @@ if (Attrs.isEmpty()) return; + bool SawMask = false; bool SawNest = false; + bool SawPassthru = false; bool SawReturned = false; bool SawSRet = false; bool SawSwiftSelf = false; bool SawSwiftError = false; + bool SawVectorLength = false; // Verify return value attributes. AttributeSet RetAttrs = Attrs.getRetAttributes(); @@ -1720,12 +1723,33 @@ SawSwiftError = true; } + if (ArgAttrs.hasAttribute(Attribute::VectorLength)) { + Assert(!SawVectorLength, "Cannot have multiple 'vlen' parameters!", + V); + SawVectorLength = true; + } + + if (ArgAttrs.hasAttribute(Attribute::Passthru)) { + Assert(!SawPassthru, "Cannot have multiple 'passthru' parameters!", + V); + SawPassthru = true; + } + + if (ArgAttrs.hasAttribute(Attribute::Mask)) { + Assert(!SawMask, "Cannot have multiple 'mask' parameters!", + V); + SawMask = true; + } + if (ArgAttrs.hasAttribute(Attribute::InAlloca)) { Assert(i == FT->getNumParams() - 1, "inalloca isn't on the last parameter!", V); } } + Assert(!SawPassthru || SawMask, + "Cannot have 'passthru' parameter without 'mask' parameter!", V); + if (!Attrs.hasAttributes(AttributeList::FunctionIndex)) return; @@ -3042,7 +3066,7 @@ /// visitUnaryOperator - Check the argument to the unary operator. /// void Verifier::visitUnaryOperator(UnaryOperator &U) { - Assert(U.getType() == U.getOperand(0)->getType(), + Assert(U.getType() == U.getOperand(0)->getType(), "Unary operators must have same type for" "operands and result!", &U); @@ -4866,7 +4890,7 @@ bool runOnFunction(Function &F) override { if (!V->verify(F) && FatalErrors) { - errs() << "in function " << F.getName() << '\n'; + errs() << "in function " << F.getName() << '\n'; report_fatal_error("Broken function found, compilation aborted!"); } return false; Index: lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- lib/Transforms/Utils/CodeExtractor.cpp +++ lib/Transforms/Utils/CodeExtractor.cpp @@ -774,6 +774,7 @@ case Attribute::InaccessibleMemOnly: case Attribute::InaccessibleMemOrArgMemOnly: case Attribute::JumpTable: + case Attribute::Mask: case Attribute::Naked: case Attribute::Nest: case Attribute::NoAlias: @@ -782,6 +783,7 @@ case Attribute::NoReturn: case Attribute::None: case Attribute::NonNull: + case Attribute::Passthru: case Attribute::ReadNone: case Attribute::ReadOnly: case Attribute::Returned: @@ -792,6 +794,7 @@ case Attribute::StructRet: case Attribute::SwiftError: case Attribute::SwiftSelf: + case Attribute::VectorLength: case Attribute::WriteOnly: case Attribute::ZExt: case Attribute::EndAttrKinds: Index: test/Bitcode/attributes.ll =================================================================== --- test/Bitcode/attributes.ll +++ test/Bitcode/attributes.ll @@ -351,6 +351,11 @@ ret void } +; CHECK: define <8 x double> @f60(<8 x double> passthru, <8 x i1> mask, i32 vlen) { +define <8 x double> @f60(<8 x double> passthru, <8 x i1> mask, i32 vlen) { + ret <8 x double> undef +} + ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } ; CHECK: attributes #2 = { readnone } Index: test/Verifier/evl_attribs.ll =================================================================== --- /dev/null +++ test/Verifier/evl_attribs.ll @@ -0,0 +1,13 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +declare void @a(<16 x i1> mask %a, <16 x i1> mask %b) +; CHECK: Cannot have multiple 'mask' parameters! + +declare void @b(<16 x i1> mask %a, i32 vlen %x, i32 vlen %y) +; CHECK: Cannot have multiple 'vlen' parameters! + +declare <16 x double> @c(<16 x double> passthru %a) +; CHECK: Cannot have 'passthru' parameter without 'mask' parameter! + +declare <16 x double> @d(<16 x double> passthru %a, <16 x i1> mask %M, <16 x double> passthru %b) +; CHECK: Cannot have multiple 'passthru' parameters! Index: utils/TableGen/CodeGenIntrinsics.h =================================================================== --- utils/TableGen/CodeGenIntrinsics.h +++ utils/TableGen/CodeGenIntrinsics.h @@ -137,7 +137,7 @@ // True if the intrinsic is marked as speculatable. bool isSpeculatable; - enum ArgAttribute { NoCapture, Returned, ReadOnly, WriteOnly, ReadNone }; + enum ArgAttribute { Mask, NoCapture, Passthru, Returned, ReadOnly, WriteOnly, ReadNone, VectorLength }; std::vector> ArgumentAttributes; bool hasProperty(enum SDNP Prop) const { Index: utils/TableGen/CodeGenTarget.cpp =================================================================== --- utils/TableGen/CodeGenTarget.cpp +++ utils/TableGen/CodeGenTarget.cpp @@ -695,6 +695,15 @@ } else if (Property->isSubClassOf("Returned")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); ArgumentAttributes.push_back(std::make_pair(ArgNo, Returned)); + } else if (Property->isSubClassOf("VectorLength")) { + unsigned ArgNo = Property->getValueAsInt("ArgNo"); + ArgumentAttributes.push_back(std::make_pair(ArgNo, VectorLength)); + } else if (Property->isSubClassOf("Mask")) { + unsigned ArgNo = Property->getValueAsInt("ArgNo"); + ArgumentAttributes.push_back(std::make_pair(ArgNo, Mask)); + } else if (Property->isSubClassOf("Passthru")) { + unsigned ArgNo = Property->getValueAsInt("ArgNo"); + ArgumentAttributes.push_back(std::make_pair(ArgNo, Passthru)); } else if (Property->isSubClassOf("ReadOnly")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); ArgumentAttributes.push_back(std::make_pair(ArgNo, ReadOnly)); Index: utils/TableGen/IntrinsicEmitter.cpp =================================================================== --- utils/TableGen/IntrinsicEmitter.cpp +++ utils/TableGen/IntrinsicEmitter.cpp @@ -595,6 +595,24 @@ OS << "Attribute::Returned"; addComma = true; break; + case CodeGenIntrinsic::VectorLength: + if (addComma) + OS << ","; + OS << "Attribute::VectorLength"; + addComma = true; + break; + case CodeGenIntrinsic::Mask: + if (addComma) + OS << ","; + OS << "Attribute::Mask"; + addComma = true; + break; + case CodeGenIntrinsic::Passthru: + if (addComma) + OS << ","; + OS << "Attribute::Passthru"; + addComma = true; + break; case CodeGenIntrinsic::ReadOnly: if (addComma) OS << ",";