Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -603,6 +603,9 @@ ATTR_KIND_OPT_FOR_FUZZING = 57, ATTR_KIND_SHADOWCALLSTACK = 58, ATTR_KIND_SPECULATIVE_LOAD_HARDENING = 59, + ATTR_KIND_MASK = 60, + ATTR_KIND_VECTORLENGTH = 61, + ATTR_KIND_PASSTHRU = 62, }; enum ComdatSelectionKindCodes { Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -198,6 +198,7 @@ /// Simple integer binary arithmetic operators. ADD, SUB, MUL, SDIV, UDIV, SREM, UREM, + EVL_ADD, EVL_SUB, EVL_MUL, EVL_SDIV, EVL_UDIV, EVL_SREM, EVL_UREM, /// SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing /// a signed/unsigned value of type i[2*N], and return the full value as @@ -280,6 +281,7 @@ /// Simple binary floating point operators. FADD, FSUB, FMUL, FDIV, FREM, + EVL_FADD, EVL_FSUB, EVL_FMUL, EVL_FDIV, EVL_FREM, /// Constrained versions of the binary floating point operators. /// These will be lowered to the simple operators before final selection. @@ -299,6 +301,7 @@ /// FMA - Perform a * b + c with no intermediate rounding step. FMA, + EVL_FMA, /// FMAD - Perform a * b + c, while getting the same result as the /// separately rounded operations. @@ -365,6 +368,19 @@ /// in terms of the element size of VEC1/VEC2, not in terms of bytes. VECTOR_SHUFFLE, + /// EVL_VSHIFT(VEC1, AMOUNT, MASK, VLEN) - Returns a vector, of the same type as + /// VEC1. AMOUNT is an integer value. The returned vector is equivalent + /// to VEC1 shifted by AMOUNT (RETURNED_VEC[idx] = VEC1[idx + AMOUNT]). + EVL_VSHIFT, + + /// EVL_COMPRESS(VEC1, MASK, VLEN) - Returns a vector, of the same type as + /// VEC1. + EVL_COMPRESS, + + /// EVL_EXPAND(VEC1, MASK, VLEN) - Returns a vector, of the same type as + /// VEC1. + EVL_EXPAND, + /// SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a /// scalar value into element 0 of the resultant vector type. The top /// elements 1 to N-1 of the N-element vector are undefined. The type @@ -384,6 +400,7 @@ /// Bitwise operators - logical and, logical or, logical xor. AND, OR, XOR, + EVL_AND, EVL_OR, EVL_XOR, /// ABS - Determine the unsigned absolute value of a signed integer value of /// the same bitwidth. @@ -407,6 +424,7 @@ /// fshl(X,Y,Z): (X << (Z % BW)) | (Y >> (BW - (Z % BW))) /// fshr(X,Y,Z): (X << (BW - (Z % BW))) | (Y >> (Z % BW)) SHL, SRA, SRL, ROTL, ROTR, FSHL, FSHR, + EVL_SHL, EVL_SRA, EVL_SRL, /// Byte Swap and Counting operators. BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE, @@ -426,6 +444,14 @@ /// change the condition type in order to match the VSELECT node using a /// pattern. The condition follows the BooleanContent format of the target. VSELECT, + EVL_SELECT, + + /// Select with an integer pivot (op #0) and two vector operands (ops #1 + /// and #2), returning a vector result. All vectors have the same length. + /// Similar to the vector select, a comparison of the results element index + /// with the integer pivot selects hether the corresponding result element + /// is taken from op #1 or op #2. + EVL_COMPOSE, /// Select with condition operator - This selects between a true value and /// a false value (ops #2 and #3) based on the boolean result of comparing @@ -440,6 +466,7 @@ /// them with (op #2) as a CondCodeSDNode. If the operands are vector types /// then the result type must also be a vector type. SETCC, + EVL_SETCC, /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but /// op #2 is a boolean indicating if there is an incoming carry. This @@ -583,6 +610,7 @@ FNEG, FABS, FSQRT, FCBRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR, + EVL_FNEG, /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two /// values. // @@ -828,6 +856,7 @@ // Val, OutChain = MLOAD(BasePtr, Mask, PassThru) // OutChain = MSTORE(Value, BasePtr, Mask) MLOAD, MSTORE, + EVL_LOAD, EVL_STORE, // Masked gather and scatter - load and store operations for a vector of // random addresses with additional mask operand that prevents memory @@ -839,6 +868,7 @@ // The Index operand can have more vector elements than the other operands // due to type legalization. The extra elements are ignored. MGATHER, MSCATTER, + EVL_GATHER, EVL_SCATTER, /// This corresponds to the llvm.lifetime.* intrinsics. The first operand /// is the chain and the second operand is the alloca pointer. @@ -870,8 +900,15 @@ VECREDUCE_ADD, VECREDUCE_MUL, VECREDUCE_AND, VECREDUCE_OR, VECREDUCE_XOR, VECREDUCE_SMAX, VECREDUCE_SMIN, VECREDUCE_UMAX, VECREDUCE_UMIN, + + EVL_REDUCE_FADD, EVL_REDUCE_FMUL, + EVL_REDUCE_ADD, EVL_REDUCE_MUL, + EVL_REDUCE_AND, EVL_REDUCE_OR, EVL_REDUCE_XOR, + EVL_REDUCE_SMAX, EVL_REDUCE_SMIN, EVL_REDUCE_UMAX, EVL_REDUCE_UMIN, + /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants. VECREDUCE_FMAX, VECREDUCE_FMIN, + EVL_REDUCE_FMAX, EVL_REDUCE_FMIN, /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific pre-isel opcode values start here. @@ -1032,6 +1069,19 @@ /// SETCC_INVALID if it is not possible to represent the resultant comparison. CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger); + /// Return the mask operand of this EVL SDNode. + /// Otw, return -1. + int GetMaskPosEVL(unsigned OpCode); + + /// Return the vector length operand of this EVL SDNode. + /// Otw, return -1. + int GetVectorLengthPosEVL(unsigned OpCode); + + /// Translate this EVL OpCode to a native instruction OpCode. + unsigned GetFunctionOpCodeForEVL(unsigned EVLOpCode); + + unsigned GetEVLForFunctionOpCode(unsigned OpCode); + } // end llvm::ISD namespace } // end llvm namespace Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -1083,6 +1083,20 @@ SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM); + /// Returns sum of the base pointer and offset. + SDValue getLoadEVL(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, + SDValue Mask, SDValue VLen, EVT MemVT, + MachineMemOperand *MMO, ISD::LoadExtType); + + SDValue getStoreEVL(SDValue Chain, const SDLoc &dl, SDValue Val, + SDValue Ptr, SDValue Mask, SDValue VLen, + EVT MemVT, MachineMemOperand *MMO, + bool IsTruncating = false); + SDValue getGatherEVL(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef Ops, MachineMemOperand *MMO); + SDValue getScatterEVL(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef Ops, MachineMemOperand *MMO); + /// Returns sum of the base pointer and offset. SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, const SDLoc &DL); Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -533,6 +533,7 @@ class LoadSDNodeBitfields { friend class LoadSDNode; friend class MaskedLoadSDNode; + friend class EVLLoadSDNode; uint16_t : NumLSBaseSDNodeBits; @@ -543,6 +544,7 @@ class StoreSDNodeBitfields { friend class StoreSDNode; friend class MaskedStoreSDNode; + friend class EVLStoreSDNode; uint16_t : NumLSBaseSDNodeBits; @@ -680,6 +682,66 @@ } } + /// Test whether this is an Explicit Vector Length node. + bool isEVL() const { + switch (NodeType) { + default: + return false; + case ISD::EVL_LOAD: + case ISD::EVL_STORE: + case ISD::EVL_GATHER: + case ISD::EVL_SCATTER: + + case ISD::EVL_FNEG: + + case ISD::EVL_FADD: + case ISD::EVL_FMUL: + case ISD::EVL_FSUB: + case ISD::EVL_FDIV: + case ISD::EVL_FREM: + + case ISD::EVL_FMA: + + case ISD::EVL_ADD: + case ISD::EVL_MUL: + case ISD::EVL_SUB: + case ISD::EVL_SRA: + case ISD::EVL_SRL: + case ISD::EVL_SHL: + case ISD::EVL_UDIV: + case ISD::EVL_SDIV: + case ISD::EVL_UREM: + case ISD::EVL_SREM: + + case ISD::EVL_EXPAND: + case ISD::EVL_COMPRESS: + case ISD::EVL_VSHIFT: + case ISD::EVL_SETCC: + case ISD::EVL_COMPOSE: + + case ISD::EVL_AND: + case ISD::EVL_XOR: + case ISD::EVL_OR: + + case ISD::EVL_REDUCE_ADD: + case ISD::EVL_REDUCE_SMIN: + case ISD::EVL_REDUCE_SMAX: + case ISD::EVL_REDUCE_UMIN: + case ISD::EVL_REDUCE_UMAX: + + case ISD::EVL_REDUCE_MUL: + case ISD::EVL_REDUCE_AND: + case ISD::EVL_REDUCE_OR: + case ISD::EVL_REDUCE_FADD: + case ISD::EVL_REDUCE_FMUL: + case ISD::EVL_REDUCE_FMIN: + case ISD::EVL_REDUCE_FMAX: + + return true; + } + } + + /// Test if this node has a post-isel opcode, directly /// corresponding to a MachineInstr opcode. bool isMachineOpcode() const { return NodeType < 0; } @@ -1367,6 +1429,10 @@ N->getOpcode() == ISD::MSTORE || N->getOpcode() == ISD::MGATHER || N->getOpcode() == ISD::MSCATTER || + N->getOpcode() == ISD::EVL_LOAD || + N->getOpcode() == ISD::EVL_STORE || + N->getOpcode() == ISD::EVL_GATHER || + N->getOpcode() == ISD::EVL_SCATTER || N->isMemIntrinsic() || N->isTargetMemoryOpcode(); } @@ -2139,6 +2205,96 @@ } }; +/// This base class is used to represent MLOAD and MSTORE nodes +class EVLLoadStoreSDNode : public MemSDNode { +public: + friend class SelectionDAG; + + EVLLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, + const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO) + : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {} + + // EVLLoadSDNode (Chain, ptr, mask, VLen) + // EVLStoreSDNode (Chain, data, ptr, mask, VLen) + // Mask is a vector of i1 elements, Vlen is i32 + const SDValue &getBasePtr() const { + return getOperand(getOpcode() == ISD::EVL_LOAD ? 1 : 2); + } + const SDValue &getMask() const { + return getOperand(getOpcode() == ISD::EVL_LOAD ? 2 : 3); + } + const SDValue &getVectorLength() const { + return getOperand(getOpcode() == ISD::EVL_LOAD ? 3 : 4); + } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::EVL_LOAD || + N->getOpcode() == ISD::EVL_STORE; + } +}; + +/// This class is used to represent an MLOAD node +class EVLLoadSDNode : public EVLLoadStoreSDNode { +public: + friend class SelectionDAG; + + EVLLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, + ISD::LoadExtType ETy, EVT MemVT, + MachineMemOperand *MMO) + : EVLLoadStoreSDNode(ISD::EVL_LOAD, Order, dl, VTs, MemVT, MMO) { + LoadSDNodeBits.ExtTy = ETy; + LoadSDNodeBits.IsExpanding = false; + } + + ISD::LoadExtType getExtensionType() const { + return static_cast(LoadSDNodeBits.ExtTy); + } + + const SDValue &getBasePtr() const { return getOperand(1); } + const SDValue &getMask() const { return getOperand(2); } + const SDValue &getVectorLength() const { return getOperand(3); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::EVL_LOAD; + } + bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; } +}; + +/// This class is used to represent an MSTORE node +class EVLStoreSDNode : public EVLLoadStoreSDNode { +public: + friend class SelectionDAG; + + EVLStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, + bool isTrunc, EVT MemVT, + MachineMemOperand *MMO) + : EVLLoadStoreSDNode(ISD::EVL_STORE, Order, dl, VTs, MemVT, MMO) { + StoreSDNodeBits.IsTruncating = isTrunc; + StoreSDNodeBits.IsCompressing = false; + } + + /// Return true if the op does a truncation before store. + /// For integers this is the same as doing a TRUNCATE and storing the result. + /// For floats, it is the same as doing an FP_ROUND and storing the result. + bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } + + /// Returns true if the op does a compression to the vector before storing. + /// The node contiguously stores the active elements (integers or floats) + /// in src (those with their respective bit set in writemask k) to unaligned + /// memory at base_addr. + bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; } + + const SDValue &getValue() const { return getOperand(1); } + const SDValue &getBasePtr() const { return getOperand(2); } + const SDValue &getMask() const { return getOperand(3); } + const SDValue &getVectorLength() const { return getOperand(4); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::EVL_STORE; + } +}; + /// This base class is used to represent MLOAD and MSTORE nodes class MaskedLoadStoreSDNode : public MemSDNode { public: @@ -2226,6 +2382,67 @@ } }; +/// This is a base class used to represent +/// EVL_GATHER and EVL_SCATTER nodes +/// +class EVLGatherScatterSDNode : public MemSDNode { +public: + friend class SelectionDAG; + + EVLGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order, + const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO) + : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {} + + // In the both nodes address is Op1, mask is Op2: + // EVLGatherSDNode (Chain, base, index, scale, mask, vlen) + // EVLScatterSDNode (Chain, value, base, index, sckae, mask, vlen) + // Mask is a vector of i1 elements + const SDValue &getBasePtr() const { return getOperand((getOpcode() == ISD::EVL_GATHER) ? 1 : 2); } + const SDValue &getIndex() const { return getOperand((getOpcode() == ISD::EVL_GATHER) ? 2 : 3); } + const SDValue &getScale() const { return getOperand((getOpcode() == ISD::EVL_GATHER) ? 3 : 4); } + const SDValue &getMask() const { return getOperand((getOpcode() == ISD::EVL_GATHER) ? 4 : 5); } + const SDValue &getVectorLength() const { return getOperand((getOpcode() == ISD::EVL_GATHER) ? 5 : 6); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::EVL_GATHER || + N->getOpcode() == ISD::EVL_SCATTER; + } +}; + +/// This class is used to represent an EVL_GATHER node +/// +class EVLGatherSDNode : public EVLGatherScatterSDNode { +public: + friend class SelectionDAG; + + EVLGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, + EVT MemVT, MachineMemOperand *MMO) + : EVLGatherScatterSDNode(ISD::EVL_GATHER, Order, dl, VTs, MemVT, MMO) {} + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::EVL_GATHER; + } +}; + +/// This class is used to represent an EVL_SCATTER node +/// +class EVLScatterSDNode : public EVLGatherScatterSDNode { +public: + friend class SelectionDAG; + + EVLScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, + EVT MemVT, MachineMemOperand *MMO) + : EVLGatherScatterSDNode(ISD::EVL_SCATTER, Order, dl, VTs, MemVT, MMO) {} + + const SDValue &getValue() const { return getOperand(1); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::EVL_SCATTER; + } +}; + + /// This is a base class used to represent /// MGATHER and MSCATTER nodes /// Index: include/llvm/IR/Attributes.td =================================================================== --- include/llvm/IR/Attributes.td +++ include/llvm/IR/Attributes.td @@ -130,6 +130,15 @@ /// Return value is always equal to this argument. def Returned : EnumAttr<"returned">; +/// Return value that is equal to this argument on enabled lanes (mask). +def Passthru : EnumAttr<"passthru">; + +/// Mask argument that applies to this function. +def Mask : EnumAttr<"mask">; + +/// Dynamic Vector Length argument of this function. +def VectorLength : EnumAttr<"vlen">; + /// Function can return twice. def ReturnsTwice : EnumAttr<"returns_twice">; Index: include/llvm/IR/EVLBuilder.h =================================================================== --- /dev/null +++ include/llvm/IR/EVLBuilder.h @@ -0,0 +1,84 @@ +#ifndef LLVM_IR_EVLBUILDER_H +#define LLVM_IR_EVLBUILDER_H + +#include +#include +#include + +namespace llvm { + +enum class EVLTypeToken : int8_t { + Scalar = 1, // scalar operand type + Vector = 2, // vectorized operand type + Mask = 3 // vector mask type +}; + +using TypeTokenVec = SmallVector; +using ShortTypeVec = SmallVector; +using ShortValueVec = SmallVector; + +struct +EVLIntrinsicDesc { + Intrinsic::ID ID; // LLVM Intrinsic ID. + TypeTokenVec typeTokens; // Type Parmeters for the LLVM Intrinsic. + int MaskPos; // Parameter index of the Mask parameter. + int EVLPos; // Parameter index of the EVL parameter. +}; + +using ValArray = ArrayRef; + +class EVLBuilder { + IRBuilder<> & Builder; + // Explicit mask parameter + Value * Mask; + // Explicit vector length parameter + Value * ExplicitVectorLength; + // Compile-time vector length + int StaticVectorLength; + + // get a vlaid mask/evl argument for the current predication contet + Value& GetMaskForType(VectorType & VecTy); + Value& GetEVLForType(VectorType & VecTy); + +public: + EVLBuilder(IRBuilder<> & _builder) + : Builder(_builder) + , Mask(nullptr) + , ExplicitVectorLength(nullptr) + , StaticVectorLength(-1) + {} + + Module & getModule() const; + + // The cannonical vector type for this \p ElementTy + VectorType& getVectorType(Type &ElementTy); + + // Predication context tracker + EVLBuilder& setMask(Value * _Mask) { Mask = _Mask; return *this; } + EVLBuilder& setEVL(Value * _ExplicitVectorLength) { ExplicitVectorLength = _ExplicitVectorLength; return *this; } + EVLBuilder& setStaticVL(int VLen) { StaticVectorLength = VLen; return *this; } + + EVLIntrinsicDesc GetEVLIntrinsicDesc(unsigned OC); + + // Create a map-vectorized copy of the instruction \p Inst with the underlying IRBuilder instance. + // This operation may return nullptr if the instruction could not be vectorized. + Value* CreateVectorCopy(Instruction & Inst, ValArray VecOpArray); + + Value& CreateGEP(ValArray VecOpArray); + + Value& CreateFAdd(ValArray VecOpArray); + Value& CreateFDiv(ValArray VecOpArray); + Value& CreateFMul(ValArray VecOpArray); + Value& CreateFSub(ValArray VecOpArray); + + // Memory + Value& CreateContiguousStore(Value & Val, Value & Pointer); + Value& CreateContiguousLoad(Value & Pointer); + Value& CreateScatter(Value & Val, Value & PointerVec); + Value& CreateGather(Value & PointerVec); +}; + + +} // namespace llvm + +#endif // LLVM_IR_EVLBUILDER_H Index: include/llvm/IR/IntrinsicInst.h =================================================================== --- include/llvm/IR/IntrinsicInst.h +++ include/llvm/IR/IntrinsicInst.h @@ -205,6 +205,85 @@ /// @} }; + class EVLIntrinsic : public IntrinsicInst { + public: + + bool isUnaryOp() const; + bool isBinaryOp() const; + bool isTernaryOp() const; + + CmpInst::Predicate getCmpPredicate() const; + + Value* GetMask() const; + Value* GetVectorLength() const; + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + + case Intrinsic::evl_cmp: + + case Intrinsic::evl_and: + case Intrinsic::evl_or: + case Intrinsic::evl_xor: + case Intrinsic::evl_ashr: + case Intrinsic::evl_lshr: + case Intrinsic::evl_shl: + + case Intrinsic::evl_select: + case Intrinsic::evl_compose: + case Intrinsic::evl_compress: + case Intrinsic::evl_expand: + case Intrinsic::evl_vshift: + + case Intrinsic::evl_load: + case Intrinsic::evl_store: + case Intrinsic::evl_gather: + case Intrinsic::evl_scatter: + + case Intrinsic::evl_fneg: + + case Intrinsic::evl_fadd: + case Intrinsic::evl_fsub: + case Intrinsic::evl_fmul: + case Intrinsic::evl_fdiv: + case Intrinsic::evl_frem: + + case Intrinsic::evl_fma: + + case Intrinsic::evl_add: + case Intrinsic::evl_sub: + case Intrinsic::evl_mul: + case Intrinsic::evl_udiv: + case Intrinsic::evl_sdiv: + case Intrinsic::evl_urem: + case Intrinsic::evl_srem: + + case Intrinsic::evl_reduce_add: + case Intrinsic::evl_reduce_mul: + case Intrinsic::evl_reduce_umin: + case Intrinsic::evl_reduce_umax: + case Intrinsic::evl_reduce_smin: + case Intrinsic::evl_reduce_smax: + + case Intrinsic::evl_reduce_and: + case Intrinsic::evl_reduce_or: + case Intrinsic::evl_reduce_xor: + + case Intrinsic::evl_reduce_fadd: + case Intrinsic::evl_reduce_fmul: + case Intrinsic::evl_reduce_fmin: + case Intrinsic::evl_reduce_fmax: + return true; + + default: return false; + } + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + }; + /// This is the common base class for constrained floating point intrinsics. class ConstrainedFPIntrinsic : public IntrinsicInst { public: Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -87,6 +87,25 @@ int ArgNo = argNo; } +// VectorLength - The specified argument is the Dynamic Vector Length of the +// operation. +class VectorLength : IntrinsicProperty { + int ArgNo = argNo; +} + +// Mask - The specified argument contains the per-lane mask of this +// intrinsic. Inputs on masked-out lanes must not affect the result of this +// intrinsic (except for the Passthru argument). +class Mask : IntrinsicProperty { + int ArgNo = argNo; +} +// Passthru - The specified argument contains the per-lane return value +// for this vector intrinsic where the mask is false. +// (requires the Mask attribute in the same function) +class Passthru : IntrinsicProperty { + int ArgNo = argNo; +} + def IntrNoReturn : IntrinsicProperty; // IntrCold - Calls to this intrinsic are cold. @@ -995,6 +1014,267 @@ // Intrinsic to detect whether its argument is a constant. def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem], "llvm.is.constant">; +//===---------------- Masked/Explicit Vector Length Intrinsics --------------===// + +// Memory Intrinsics +def int_evl_store : Intrinsic<[], + [ llvm_anyvector_ty, + LLVMAnyPointerType>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrArgMemOnly, Mask<2>, VectorLength<3> ]>; + +def int_evl_load : Intrinsic<[ llvm_anyvector_ty], + [ LLVMAnyPointerType>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrReadMem, IntrArgMemOnly, Mask<1>, VectorLength<2> ]>; + +def int_evl_gather: Intrinsic<[ llvm_anyvector_ty], + [ LLVMVectorOfAnyPointersToElt<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrReadMem, IntrReadMem, Mask<1>, VectorLength<2> ]>; + +def int_evl_scatter: Intrinsic<[], + [ llvm_anyvector_ty, + LLVMVectorOfAnyPointersToElt<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrArgMemOnly, Mask<2>, VectorLength<3> ]>; + +// Reductions +let IntrProperties = [IntrNoMem, Mask<2>, VectorLength<3>] in { +def int_evl_reduce_add : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; +def int_evl_reduce_mul : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; +def int_evl_reduce_and : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; +def int_evl_reduce_or : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; +def int_evl_reduce_xor : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; +def int_evl_reduce_smax : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; +def int_evl_reduce_smin : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; +def int_evl_reduce_umax : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; +def int_evl_reduce_umin : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; + +def int_evl_reduce_fadd : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; +def int_evl_reduce_fmul : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; +def int_evl_reduce_fmax : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; +def int_evl_reduce_fmin : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, + llvm_anyvector_ty, + LLVMVectorSameWidth<1, llvm_i1_ty>, + llvm_i32_ty]>; +} + +// Binary operators +let IntrProperties = [IntrNoMem, Mask<2>, VectorLength<3>] in { + def int_evl_add : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_sub : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_mul : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_sdiv : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_udiv : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_srem : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_urem : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + + def int_evl_fadd : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_fsub : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_fmul : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_fdiv : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_frem : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + +// Logical operators + def int_evl_ashr : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_lshr : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_shl : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_or : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_and : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_evl_xor : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + +// Comparison +// The last argument is the comparison predicate + def int_evl_cmp : Intrinsic<[ llvm_anyvector_ty ], + [ llvm_anyvector_ty, + LLVMMatchType<1>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty, + llvm_i8_ty]>; +} + + + +def int_evl_fneg : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrNoMem, Mask<1>, VectorLength<2> ]>; + +def int_evl_fma : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrNoMem, Mask<3>, VectorLength<4> ]>; + +// Shuffle +def int_evl_vshift: Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + llvm_i32_ty, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrNoMem, Mask<2>, VectorLength<3> ]>; + +def int_evl_expand: Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrNoMem, Mask<2>, VectorLength<3> ]>; + +def int_evl_compress: Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrNoMem, Mask<2>, VectorLength<3> ]>; + +// Select +def int_evl_select : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMVectorSameWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ IntrNoMem, Passthru<1>, Mask<2>, VectorLength<3> ]>; + +// Compose +def int_evl_compose : Intrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty, + llvm_i32_ty], + [ IntrNoMem, VectorLength<3> ]>; + + + //===-------------------------- Masked Intrinsics -------------------------===// // Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -128,6 +128,13 @@ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> ]>; +def SDTIntBinOpEVL : SDTypeProfile<1, 4, [ // evl_add, evl_and, etc. + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<4>, SDTCisSameNumEltsAs<0, 3> +]>; +def SDTIntShiftOpEVL : SDTypeProfile<1, 4, [ // shl, sra, srl + SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>, SDTCisInt<4>, SDTCisSameNumEltsAs<0, 3> +]>; + def SDTFPBinOp : SDTypeProfile<1, 2, [ // fadd, fmul, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0> ]>; @@ -170,6 +177,16 @@ SDTCisOpSmallerThanOp<1, 0> ]>; +def SDTFPUnOpEVL : SDTypeProfile<1, 3, [ // evl_fneg, etc. + SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<3>, SDTCisSameNumEltsAs<0, 2> +]>; +def SDTFPBinOpEVL : SDTypeProfile<1, 4, [ // evl_fadd, etc. + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<4>, SDTCisSameNumEltsAs<0, 3> +]>; +def SDTFPTernaryOpEVL : SDTypeProfile<1, 5, [ // evl_fmadd, etc. + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0>, SDTCisInt<5>, SDTCisSameNumEltsAs<0, 4> +]>; + def SDTSetCC : SDTypeProfile<1, 3, [ // setcc SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> ]>; @@ -182,6 +199,10 @@ SDTCisVec<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisSameNumEltsAs<0, 1> ]>; +def SDTVSelectEVL : SDTypeProfile<1, 5, [ // evl_vselect + SDTCisVec<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisSameNumEltsAs<0, 1>, SDTCisInt<5>, SDTCisSameNumEltsAs<0, 4> +]>; + def SDTSelectCC : SDTypeProfile<1, 5, [ // select_cc SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>, SDTCisSameAs<0, 3>, SDTCisVT<5, OtherVT> @@ -225,11 +246,20 @@ SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2> ]>; +def SDTStoreEVL: SDTypeProfile<0, 4, [ // evl store + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>, SDTCisInt<3> +]>; + def SDTMaskedLoad: SDTypeProfile<1, 3, [ // masked load SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>, SDTCisSameNumEltsAs<0, 2> ]>; +def SDTLoadEVL : SDTypeProfile<1, 3, [ // evl load + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisSameNumEltsAs<0, 2>, SDTCisInt<3>, + SDTCisSameNumEltsAs<0, 2> +]>; + def SDTVecShuffle : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; @@ -385,6 +415,26 @@ def umax : SDNode<"ISD::UMAX" , SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; +def evl_and : SDNode<"ISD::EVL_AND" , SDTIntBinOpEVL , + [SDNPCommutative, SDNPAssociative]>; +def evl_or : SDNode<"ISD::EVL_OR" , SDTIntBinOpEVL , + [SDNPCommutative, SDNPAssociative]>; +def evl_xor : SDNode<"ISD::EVL_XOR" , SDTIntBinOpEVL , + [SDNPCommutative, SDNPAssociative]>; +def evl_srl : SDNode<"ISD::EVL_SRL" , SDTIntShiftOpEVL>; +def evl_sra : SDNode<"ISD::EVL_SRA" , SDTIntShiftOpEVL>; +def evl_shl : SDNode<"ISD::EVL_SHL" , SDTIntShiftOpEVL>; + +def evl_add : SDNode<"ISD::EVL_ADD" , SDTIntBinOpEVL , + [SDNPCommutative, SDNPAssociative]>; +def evl_sub : SDNode<"ISD::EVL_SUB" , SDTIntBinOpEVL>; +def evl_mul : SDNode<"ISD::EVL_MUL" , SDTIntBinOpEVL, + [SDNPCommutative, SDNPAssociative]>; +def evl_sdiv : SDNode<"ISD::EVL_SDIV" , SDTIntBinOpEVL>; +def evl_udiv : SDNode<"ISD::EVL_UDIV" , SDTIntBinOpEVL>; +def evl_srem : SDNode<"ISD::EVL_SREM" , SDTIntBinOpEVL>; +def evl_urem : SDNode<"ISD::EVL_UREM" , SDTIntBinOpEVL>; + def saddsat : SDNode<"ISD::SADDSAT" , SDTIntBinOp, [SDNPCommutative]>; def uaddsat : SDNode<"ISD::UADDSAT" , SDTIntBinOp, [SDNPCommutative]>; def ssubsat : SDNode<"ISD::SSUBSAT" , SDTIntBinOp>; @@ -452,6 +502,14 @@ def fpextend : SDNode<"ISD::FP_EXTEND" , SDTFPExtendOp>; def fcopysign : SDNode<"ISD::FCOPYSIGN" , SDTFPSignOp>; +def evl_fneg : SDNode<"ISD::EVL_FNEG" , SDTFPUnOpEVL>; +def evl_fadd : SDNode<"ISD::EVL_FADD" , SDTFPBinOpEVL, [SDNPCommutative]>; +def evl_fsub : SDNode<"ISD::EVL_FSUB" , SDTFPBinOpEVL>; +def evl_fmul : SDNode<"ISD::EVL_FMUL" , SDTFPBinOpEVL, [SDNPCommutative]>; +def evl_fdiv : SDNode<"ISD::EVL_FDIV" , SDTFPBinOpEVL>; +def evl_frem : SDNode<"ISD::EVL_FREM" , SDTFPBinOpEVL>; +def evl_fma : SDNode<"ISD::EVL_FMA" , SDTFPTernaryOpEVL>; + def sint_to_fp : SDNode<"ISD::SINT_TO_FP" , SDTIntToFPOp>; def uint_to_fp : SDNode<"ISD::UINT_TO_FP" , SDTIntToFPOp>; def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>; @@ -459,10 +517,10 @@ def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>; def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>; -def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; -def select : SDNode<"ISD::SELECT" , SDTSelect>; -def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>; -def selectcc : SDNode<"ISD::SELECT_CC" , SDTSelectCC>; +def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; +def select : SDNode<"ISD::SELECT" , SDTSelect>; +def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>; +def selectcc : SDNode<"ISD::SELECT_CC" , SDTSelectCC>; def brcc : SDNode<"ISD::BR_CC" , SDTBrCC, [SDNPHasChain]>; def brcond : SDNode<"ISD::BRCOND" , SDTBrcond, [SDNPHasChain]>; @@ -530,6 +588,11 @@ def masked_load : SDNode<"ISD::MLOAD", SDTMaskedLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def evl_store : SDNode<"ISD::EVL_STORE", SDTMaskedStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def evl_load : SDNode<"ISD::EVL_LOAD", SDTMaskedLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + // Do not use ld, st directly. Use load, extload, sextload, zextload, store, // and truncst (see below). def ld : SDNode<"ISD::LOAD" , SDTLoad, Index: lib/AsmParser/LLLexer.cpp =================================================================== --- lib/AsmParser/LLLexer.cpp +++ lib/AsmParser/LLLexer.cpp @@ -642,6 +642,7 @@ KEYWORD(inlinehint); KEYWORD(inreg); KEYWORD(jumptable); + KEYWORD(mask); KEYWORD(minsize); KEYWORD(naked); KEYWORD(nest); @@ -661,6 +662,7 @@ KEYWORD(optforfuzzing); KEYWORD(optnone); KEYWORD(optsize); + KEYWORD(passthru); KEYWORD(readnone); KEYWORD(readonly); KEYWORD(returned); @@ -682,6 +684,7 @@ KEYWORD(swifterror); KEYWORD(swiftself); KEYWORD(uwtable); + KEYWORD(vlen); KEYWORD(writeonly); KEYWORD(zeroext); Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -1294,14 +1294,17 @@ case lltok::kw_dereferenceable: case lltok::kw_dereferenceable_or_null: case lltok::kw_inalloca: + case lltok::kw_mask: case lltok::kw_nest: case lltok::kw_noalias: case lltok::kw_nocapture: case lltok::kw_nonnull: + case lltok::kw_passthru: case lltok::kw_returned: case lltok::kw_sret: case lltok::kw_swifterror: case lltok::kw_swiftself: + case lltok::kw_vlen: HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute on a function"); @@ -1582,10 +1585,12 @@ } case lltok::kw_inalloca: B.addAttribute(Attribute::InAlloca); break; case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; + case lltok::kw_mask: B.addAttribute(Attribute::Mask); break; case lltok::kw_nest: B.addAttribute(Attribute::Nest); break; case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; case lltok::kw_nocapture: B.addAttribute(Attribute::NoCapture); break; case lltok::kw_nonnull: B.addAttribute(Attribute::NonNull); break; + case lltok::kw_passthru: B.addAttribute(Attribute::Passthru); break; case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break; case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; case lltok::kw_returned: B.addAttribute(Attribute::Returned); break; @@ -1593,6 +1598,7 @@ case lltok::kw_sret: B.addAttribute(Attribute::StructRet); break; case lltok::kw_swifterror: B.addAttribute(Attribute::SwiftError); break; case lltok::kw_swiftself: B.addAttribute(Attribute::SwiftSelf); break; + case lltok::kw_vlen: B.addAttribute(Attribute::VectorLength); break; case lltok::kw_writeonly: B.addAttribute(Attribute::WriteOnly); break; case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; @@ -1683,12 +1689,15 @@ // Error handling. case lltok::kw_byval: case lltok::kw_inalloca: + case lltok::kw_mask: case lltok::kw_nest: case lltok::kw_nocapture: + case lltok::kw_passthru: case lltok::kw_returned: case lltok::kw_sret: case lltok::kw_swifterror: case lltok::kw_swiftself: + case lltok::kw_vlen: HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute"); break; @@ -3294,7 +3303,7 @@ ID.Kind = ValID::t_Constant; return false; } - + // Unary Operators. case lltok::kw_fneg: { unsigned Opc = Lex.getUIntVal(); @@ -3304,7 +3313,7 @@ ParseGlobalTypeAndValue(Val) || ParseToken(lltok::rparen, "expected ')' in unary constantexpr")) return true; - + // Check that the type is valid for the operator. switch (Opc) { case Instruction::FNeg: @@ -6169,11 +6178,11 @@ Valid = LHS->getType()->isIntOrIntVectorTy() || LHS->getType()->isFPOrFPVectorTy(); break; - case 1: - Valid = LHS->getType()->isIntOrIntVectorTy(); + case 1: + Valid = LHS->getType()->isIntOrIntVectorTy(); break; - case 2: - Valid = LHS->getType()->isFPOrFPVectorTy(); + case 2: + Valid = LHS->getType()->isFPOrFPVectorTy(); break; } Index: lib/AsmParser/LLToken.h =================================================================== --- lib/AsmParser/LLToken.h +++ lib/AsmParser/LLToken.h @@ -186,6 +186,7 @@ kw_inlinehint, kw_inreg, kw_jumptable, + kw_mask, kw_minsize, kw_naked, kw_nest, @@ -205,6 +206,7 @@ kw_optforfuzzing, kw_optnone, kw_optsize, + kw_passthru, kw_readnone, kw_readonly, kw_returned, @@ -224,6 +226,7 @@ kw_swifterror, kw_swiftself, kw_uwtable, + kw_vlen, kw_writeonly, kw_zeroext, Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -1332,6 +1332,8 @@ return Attribute::InReg; case bitc::ATTR_KIND_JUMP_TABLE: return Attribute::JumpTable; + case bitc::ATTR_KIND_MASK: + return Attribute::Mask; case bitc::ATTR_KIND_MIN_SIZE: return Attribute::MinSize; case bitc::ATTR_KIND_NAKED: @@ -1376,6 +1378,8 @@ return Attribute::OptimizeForSize; case bitc::ATTR_KIND_OPTIMIZE_NONE: return Attribute::OptimizeNone; + case bitc::ATTR_KIND_PASSTHRU: + return Attribute::Passthru; case bitc::ATTR_KIND_READ_NONE: return Attribute::ReadNone; case bitc::ATTR_KIND_READ_ONLY: @@ -1420,6 +1424,8 @@ return Attribute::SwiftSelf; case bitc::ATTR_KIND_UW_TABLE: return Attribute::UWTable; + case bitc::ATTR_KIND_VECTORLENGTH: + return Attribute::VectorLength; case bitc::ATTR_KIND_WRITEONLY: return Attribute::WriteOnly; case bitc::ATTR_KIND_Z_EXT: Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -670,6 +670,12 @@ return bitc::ATTR_KIND_READ_ONLY; case Attribute::Returned: return bitc::ATTR_KIND_RETURNED; + case Attribute::Mask: + return bitc::ATTR_KIND_MASK; + case Attribute::VectorLength: + return bitc::ATTR_KIND_VECTORLENGTH; + case Attribute::Passthru: + return bitc::ATTR_KIND_PASSTHRU; case Attribute::ReturnsTwice: return bitc::ATTR_KIND_RETURNS_TWICE; case Attribute::SExt: Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -353,6 +353,7 @@ SDValue visitBITCAST(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); SDValue visitFADD(SDNode *N); + SDValue visitFADD_EVL(SDNode *N); SDValue visitFSUB(SDNode *N); SDValue visitFMUL(SDNode *N); SDValue visitFMA(SDNode *N); @@ -400,6 +401,7 @@ SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); + template SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); SDValue visitFMULForFMADistributiveCombine(SDNode *N); @@ -641,6 +643,138 @@ } }; +// TODO port this to EVL nodes +struct EmptyMatchContext { + SelectionDAG & DAG; + + EmptyMatchContext(SelectionDAG & DAG, SDNode * Root) + : DAG(DAG) + {} + + bool match(SDValue OpN, unsigned OpCode) const { return OpCode == OpN->getOpcode(); } + + unsigned getFunctionOpCode(SDValue N) const { + return N->getOpcode(); + } + + bool isCompatible(SDValue OpVal) const { return true; } + + // Specialize based on number of operands. + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return DAG.getNode(Opcode, DL, VT); } + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand, + const SDNodeFlags Flags = SDNodeFlags()) { + return DAG.getNode(Opcode, DL, VT, Operand, Flags); + } + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, + SDValue N2, const SDNodeFlags Flags = SDNodeFlags()) { + return DAG.getNode(Opcode, DL, VT, N1, N2, Flags); + } + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, + SDValue N2, SDValue N3, + const SDNodeFlags Flags = SDNodeFlags()) { + return DAG.getNode(Opcode, DL, VT, N1, N2, N3); + } + + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, + SDValue N2, SDValue N3, SDValue N4) { + return DAG.getNode(Opcode, DL, VT, N1, N2, N3, N4); + } + + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, + SDValue N2, SDValue N3, SDValue N4, SDValue N5) { + return DAG.getNode(Opcode, DL, VT, N1, N2, N3, N4, N5); + } +}; + +struct +EVLMatchContext { + SelectionDAG & DAG; + SDNode * Root; + SDValue RootMaskOp; + SDValue RootVectorLenOp; + + EVLMatchContext(SelectionDAG & DAG, SDNode * Root) + : DAG(DAG) + , Root(Root) + , RootMaskOp() + , RootVectorLenOp() + { + if (Root->isEVL()) { + int RootMaskPos = ISD::GetMaskPosEVL(Root->getOpcode()); + if (RootMaskPos != -1) { + RootMaskOp = Root->getOperand(RootMaskPos); + } + + int RootVLenPos = ISD::GetVectorLengthPosEVL(Root->getOpcode()); + if (RootVLenPos != -1) { + RootVectorLenOp = Root->getOperand(RootVLenPos); + } + } + } + + unsigned getFunctionOpCode(SDValue N) const { + unsigned EVLOpCode = N->getOpcode(); + return ISD::GetFunctionOpCodeForEVL(EVLOpCode); + } + + bool isCompatible(SDValue OpVal) const { + if (!OpVal->isEVL()) { + return !Root->isEVL(); + + } else { + unsigned EVLOpCode = OpVal->getOpcode(); + int MaskPos = ISD::GetMaskPosEVL(EVLOpCode); + if (MaskPos != -1 && RootMaskOp != OpVal.getOperand(MaskPos)) { + return false; + } + + int VLenPos = ISD::GetVectorLengthPosEVL(EVLOpCode); + if (VLenPos != -1 && RootVectorLenOp != OpVal.getOperand(VLenPos)) { + return false; + } + + return true; + } + } + + /// whether \p OpN is a node that is functionally compatible with the NodeType \p OpNodeTy + bool match(SDValue OpVal, unsigned OpNT) const { + return isCompatible(OpVal) && getFunctionOpCode(OpVal) == OpNT; + } + + // Specialize based on number of operands. + // TODO emit EVL intrinsics where MaskOp/VectorLenOp != null + // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return DAG.getNode(Opcode, DL, VT); } + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand, + const SDNodeFlags Flags = SDNodeFlags()) { + unsigned EVLOpcode = ISD::GetEVLForFunctionOpCode(Opcode); + int MaskPos = ISD::GetMaskPosEVL(EVLOpcode); + int VLenPos = ISD::GetVectorLengthPosEVL(EVLOpcode); + assert(MaskPos == 1 && VLenPos == 2); + + return DAG.getNode(EVLOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp}, Flags); + } + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, + SDValue N2, const SDNodeFlags Flags = SDNodeFlags()) { + unsigned EVLOpcode = ISD::GetEVLForFunctionOpCode(Opcode); + int MaskPos = ISD::GetMaskPosEVL(EVLOpcode); + int VLenPos = ISD::GetVectorLengthPosEVL(EVLOpcode); + assert(MaskPos == 2 && VLenPos == 3); + + return DAG.getNode(EVLOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp}, Flags); + } + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, + SDValue N2, SDValue N3, + const SDNodeFlags Flags = SDNodeFlags()) { + unsigned EVLOpcode = ISD::GetEVLForFunctionOpCode(Opcode); + int MaskPos = ISD::GetMaskPosEVL(EVLOpcode); + int VLenPos = ISD::GetVectorLengthPosEVL(EVLOpcode); + assert(MaskPos == 3 && VLenPos == 4); + + return DAG.getNode(EVLOpcode, DL, VT, {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags); + } +}; + } // end anonymous namespace //===----------------------------------------------------------------------===// @@ -1549,6 +1683,7 @@ case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); case ISD::FADD: return visitFADD(N); + case ISD::EVL_FADD: return visitFADD_EVL(N); case ISD::FSUB: return visitFSUB(N); case ISD::FMUL: return visitFMUL(N); case ISD::FMA: return visitFMA(N); @@ -10440,13 +10575,18 @@ return F.hasAllowContract() || F.hasAllowReassociation(); } + /// Try to perform FMA combining on a given FADD node. +template SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc SL(N); + MatchContextClass matcher(DAG, N); + if (!matcher.isCompatible(N0) || !matcher.isCompatible(N1)) return SDValue(); + const TargetOptions &Options = DAG.getTarget().Options; // Floating-point multiply-add with intermediate rounding. @@ -10479,8 +10619,8 @@ // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. - auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { - if (N.getOpcode() != ISD::FMUL) + auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) { + if (!matcher.match(N, ISD::FMUL)) return false; return AllowFusionGlobally || isContractable(N.getNode()); }; @@ -10493,42 +10633,42 @@ // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, + return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), N1, Flags); } // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, + return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0), N1.getOperand(1), N0, Flags); } // Look through FP_EXTEND nodes to do more combining. // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) - if (N0.getOpcode() == ISD::FP_EXTEND) { + if ((N0.getOpcode() == ISD::FP_EXTEND) && matcher.isCompatible(N0.getOperand(0))) { SDValue N00 = N0.getOperand(0); if (isContractableFMUL(N00) && TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, + return matcher.getNode(PreferredFusedOpcode, SL, VT, + matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, + matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1, Flags); } } // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FP_EXTEND) { + if (matcher.match(N1, ISD::FP_EXTEND)) { SDValue N10 = N1.getOperand(0); if (isContractableFMUL(N10) && TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, + return matcher.getNode(PreferredFusedOpcode, SL, VT, + matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, + matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0, Flags); } } @@ -10537,12 +10677,12 @@ if (Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) if (CanFuse && - N0.getOpcode() == PreferredFusedOpcode && - N0.getOperand(2).getOpcode() == ISD::FMUL && + matcher.match(N0, PreferredFusedOpcode) && + matcher.match(N0.getOperand(2), ISD::FMUL) && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, + return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), - DAG.getNode(PreferredFusedOpcode, SL, VT, + matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(2).getOperand(0), N0.getOperand(2).getOperand(1), N1, Flags), Flags); @@ -10550,12 +10690,12 @@ // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) if (CanFuse && - N1->getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FMUL && + matcher.match(N1, PreferredFusedOpcode) && + matcher.match(N1.getOperand(2), ISD::FMUL) && N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, + return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0), N1.getOperand(1), - DAG.getNode(PreferredFusedOpcode, SL, VT, + matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(2).getOperand(0), N1.getOperand(2).getOperand(1), N0, Flags), Flags); @@ -10567,15 +10707,15 @@ auto FoldFAddFMAFPExtFMul = [&] ( SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, SDNodeFlags Flags) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, U), - DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + return matcher.getNode(PreferredFusedOpcode, SL, VT, X, Y, + matcher.getNode(PreferredFusedOpcode, SL, VT, + matcher.getNode(ISD::FP_EXTEND, SL, VT, U), + matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z, Flags), Flags); }; - if (N0.getOpcode() == PreferredFusedOpcode) { + if (matcher.match(N0, PreferredFusedOpcode)) { SDValue N02 = N0.getOperand(2); - if (N02.getOpcode() == ISD::FP_EXTEND) { + if (matcher.match(N02, ISD::FP_EXTEND)) { SDValue N020 = N02.getOperand(0); if (isContractableFMUL(N020) && TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { @@ -10594,12 +10734,12 @@ auto FoldFAddFPExtFMAFMul = [&] ( SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, SDNodeFlags Flags) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, X), - DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, U), - DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + return matcher.getNode(PreferredFusedOpcode, SL, VT, + matcher.getNode(ISD::FP_EXTEND, SL, VT, X), + matcher.getNode(ISD::FP_EXTEND, SL, VT, Y), + matcher.getNode(PreferredFusedOpcode, SL, VT, + matcher.getNode(ISD::FP_EXTEND, SL, VT, U), + matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z, Flags), Flags); }; if (N0.getOpcode() == ISD::FP_EXTEND) { @@ -11036,6 +11176,15 @@ return SDValue(); } +SDValue DAGCombiner::visitFADD_EVL(SDNode *N) { + // FADD -> FMA combines: + if (SDValue Fused = visitFADDForFMACombine(N)) { + AddToWorklist(Fused.getNode()); + return Fused; + } + return SDValue(); +} + SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -11206,7 +11355,7 @@ } // enable-unsafe-fp-math // FADD -> FMA combines: - if (SDValue Fused = visitFADDForFMACombine(N)) { + if (SDValue Fused = visitFADDForFMACombine(N)) { AddToWorklist(Fused.getNode()); return Fused; } @@ -17715,7 +17864,7 @@ NewMask.push_back(M < 0 ? -1 : Scale * M + s); return NewMask; }; - + SDValue BC0 = peekThroughOneUseBitcasts(N0); if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { EVT SVT = VT.getScalarType(); Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -885,7 +885,7 @@ } // Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that -// the third operand of ADDE/SUBE nodes is carry flag, which differs from +// the third operand of ADDE/SUBE nodes is carry flag, which differs from // the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean. SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { if (ResNo == 1) @@ -1031,6 +1031,9 @@ return false; } + if (N->isEVL()) { + Res = PromoteIntOp_EVL(N, OpNo); + } else { switch (N->getOpcode()) { default: #ifndef NDEBUG @@ -1092,6 +1095,7 @@ case ISD::SMULFIX: Res = PromoteIntOp_SMULFIX(N); break; } + } // If the result is null, the sub-method took care of registering results etc. if (!Res.getNode()) return false; @@ -1365,6 +1369,25 @@ TruncateStore, N->isCompressingStore()); } +SDValue DAGTypeLegalizer::PromoteIntOp_EVL(SDNode *N, unsigned OpNo) { + EVT DataVT; + switch (N->getOpcode()) { + default: + DataVT = N->getValueType(0); + break; + + case ISD::EVL_STORE: + case ISD::EVL_SCATTER: + llvm_unreachable("TODO implement EVL memory nodes"); + } + + // TODO assert that \p OpNo is the mask + SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + SmallVector NewOps(N->op_begin(), N->op_end()); + NewOps[OpNo] = Mask; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo) { assert(OpNo == 2 && "Only know how to promote the mask!"); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -347,6 +347,7 @@ SDValue PromoteIntRes_SMULFIX(SDNode *N); SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); + // Integer Operand Promotion. bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); @@ -379,6 +380,7 @@ SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N); SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SMULFIX(SDNode *N); + SDValue PromoteIntOp_EVL(SDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -432,6 +432,183 @@ return Result; } +//===----------------------------------------------------------------------===// +// SDNode EVL Support +//===----------------------------------------------------------------------===// + +int +ISD::GetMaskPosEVL(unsigned OpCode) { + switch (OpCode) { + default: return -1; + + case ISD::EVL_FNEG: + return 1; + + case ISD::EVL_ADD: + case ISD::EVL_SUB: + case ISD::EVL_MUL: + case ISD::EVL_SDIV: + case ISD::EVL_SREM: + case ISD::EVL_UDIV: + case ISD::EVL_UREM: + + case ISD::EVL_AND: + case ISD::EVL_OR: + case ISD::EVL_XOR: + case ISD::EVL_SHL: + case ISD::EVL_SRA: + case ISD::EVL_SRL: + case ISD::EVL_FDIV: + case ISD::EVL_FREM: + + case ISD::EVL_FADD: + case ISD::EVL_FMUL: + return 2; + + case ISD::EVL_FMA: + case ISD::EVL_SELECT: + return 3; + + case EVL_REDUCE_FADD: + case EVL_REDUCE_FMUL: + case EVL_REDUCE_ADD: + case EVL_REDUCE_MUL: + case EVL_REDUCE_AND: + case EVL_REDUCE_OR: + case EVL_REDUCE_XOR: + case EVL_REDUCE_SMAX: + case EVL_REDUCE_SMIN: + case EVL_REDUCE_UMAX: + case EVL_REDUCE_UMIN: + case VECREDUCE_FMAX: + case VECREDUCE_FMIN: + case EVL_REDUCE_FMAX: + case EVL_REDUCE_FMIN: + return 1; + + /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants. + // (implicit) case ISD::EVL_COMPOSE: return -1 + } +} + +int +ISD::GetVectorLengthPosEVL(unsigned OpCode) { + switch (OpCode) { + default: return -1; + + case ISD::EVL_SELECT: + return 0; + + case ISD::EVL_FNEG: + return 2; + + case ISD::EVL_ADD: + case ISD::EVL_SUB: + case ISD::EVL_MUL: + case ISD::EVL_SDIV: + case ISD::EVL_SREM: + case ISD::EVL_UDIV: + case ISD::EVL_UREM: + + case ISD::EVL_AND: + case ISD::EVL_OR: + case ISD::EVL_XOR: + case ISD::EVL_SHL: + case ISD::EVL_SRA: + case ISD::EVL_SRL: + + case ISD::EVL_FADD: + case ISD::EVL_FMUL: + case ISD::EVL_FDIV: + case ISD::EVL_FREM: + return 3; + + case ISD::EVL_FMA: + return 4; + + case ISD::EVL_COMPOSE: + return 3; + + case EVL_REDUCE_FADD: + case EVL_REDUCE_FMUL: + case EVL_REDUCE_ADD: + case EVL_REDUCE_MUL: + case EVL_REDUCE_AND: + case EVL_REDUCE_OR: + case EVL_REDUCE_XOR: + case EVL_REDUCE_SMAX: + case EVL_REDUCE_SMIN: + case EVL_REDUCE_UMAX: + case EVL_REDUCE_UMIN: + case EVL_REDUCE_FMAX: + case EVL_REDUCE_FMIN: + return 2; + } +} + +unsigned +ISD::GetFunctionOpCodeForEVL(unsigned OpCode) { + switch (OpCode) { + default: return OpCode; + + case ISD::EVL_SELECT: return ISD::VSELECT; + case ISD::EVL_FNEG: return ISD::FNEG; + case ISD::EVL_ADD: return ISD::ADD; + case ISD::EVL_SUB: return ISD::SUB; + case ISD::EVL_MUL: return ISD::MUL; + case ISD::EVL_SDIV: return ISD::SDIV; + case ISD::EVL_SREM: return ISD::SREM; + case ISD::EVL_UDIV: return ISD::UDIV; + case ISD::EVL_UREM: return ISD::UREM; + + case ISD::EVL_AND: return ISD::AND; + case ISD::EVL_OR: return ISD::OR; + case ISD::EVL_XOR: return ISD::XOR; + case ISD::EVL_SHL: return ISD::SHL; + case ISD::EVL_SRA: return ISD::SRA; + case ISD::EVL_SRL: return ISD::SRL; + case ISD::EVL_FDIV: return ISD::FDIV; + case ISD::EVL_FREM: return ISD::FREM; + + case ISD::EVL_FADD: return ISD::FADD; + case ISD::EVL_FMUL: return ISD::FMUL; + + case ISD::EVL_FMA: return ISD::FMA; + } +} + +unsigned +ISD::GetEVLForFunctionOpCode(unsigned OpCode) { + switch (OpCode) { + default: llvm_unreachable("can not translate this Opcode to EVL"); + + case ISD::VSELECT:return ISD::EVL_SELECT; + case ISD::FNEG: return ISD::EVL_FNEG; + case ISD::ADD: return ISD::EVL_ADD; + case ISD::SUB: return ISD::EVL_SUB; + case ISD::MUL: return ISD::EVL_MUL; + case ISD::SDIV: return ISD::EVL_SDIV; + case ISD::SREM: return ISD::EVL_SREM; + case ISD::UDIV: return ISD::EVL_UDIV; + case ISD::UREM: return ISD::EVL_UREM; + + case ISD::AND: return ISD::EVL_AND; + case ISD::OR: return ISD::EVL_OR; + case ISD::XOR: return ISD::EVL_XOR; + case ISD::SHL: return ISD::EVL_SHL; + case ISD::SRA: return ISD::EVL_SRA; + case ISD::SRL: return ISD::EVL_SRL; + case ISD::FDIV: return ISD::EVL_FDIV; + case ISD::FREM: return ISD::EVL_FREM; + + case ISD::FADD: return ISD::EVL_FADD; + case ISD::FMUL: return ISD::EVL_FMUL; + + case ISD::FMA: return ISD::EVL_FMA; + } +} + + //===----------------------------------------------------------------------===// // SDNode Profile Support //===----------------------------------------------------------------------===// @@ -555,6 +732,34 @@ ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } + case ISD::EVL_LOAD: { + const EVLLoadSDNode *ELD = cast(N); + ID.AddInteger(ELD->getMemoryVT().getRawBits()); + ID.AddInteger(ELD->getRawSubclassData()); + ID.AddInteger(ELD->getPointerInfo().getAddrSpace()); + break; + } + case ISD::EVL_STORE: { + const EVLStoreSDNode *EST = cast(N); + ID.AddInteger(EST->getMemoryVT().getRawBits()); + ID.AddInteger(EST->getRawSubclassData()); + ID.AddInteger(EST->getPointerInfo().getAddrSpace()); + break; + } + case ISD::EVL_GATHER: { + const EVLGatherSDNode *EG = cast(N); + ID.AddInteger(EG->getMemoryVT().getRawBits()); + ID.AddInteger(EG->getRawSubclassData()); + ID.AddInteger(EG->getPointerInfo().getAddrSpace()); + break; + } + case ISD::EVL_SCATTER: { + const EVLScatterSDNode *ES = cast(N); + ID.AddInteger(ES->getMemoryVT().getRawBits()); + ID.AddInteger(ES->getRawSubclassData()); + ID.AddInteger(ES->getPointerInfo().getAddrSpace()); + break; + } case ISD::MLOAD: { const MaskedLoadSDNode *MLD = cast(N); ID.AddInteger(MLD->getMemoryVT().getRawBits()); @@ -6868,6 +7073,34 @@ return V; } +SDValue SelectionDAG::getLoadEVL(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue VLen, + EVT MemVT, MachineMemOperand *MMO, + ISD::LoadExtType ExtTy) { + SDVTList VTs = getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, VLen }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EVL_LOAD, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + dl.getIROrder(), VTs, ExtTy, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, + ExtTy, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Mask, SDValue PassThru, EVT MemVT, MachineMemOperand *MMO, @@ -6896,6 +7129,111 @@ return V; } +SDValue SelectionDAG::getStoreEVL(SDValue Chain, const SDLoc &dl, + SDValue Val, SDValue Ptr, SDValue Mask, + SDValue VLen, EVT MemVT, MachineMemOperand *MMO, + bool IsTruncating) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + EVT VT = Val.getValueType(); + SDVTList VTs = getVTList(MVT::Other); + SDValue Ops[] = { Chain, Val, Ptr, Mask, VLen }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EVL_STORE, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + dl.getIROrder(), VTs, IsTruncating, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, + IsTruncating, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getGatherEVL(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef Ops, + MachineMemOperand *MMO) { + assert(Ops.size() == 6 && "Incompatible number of operands"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EVL_GATHER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + dl.getIROrder(), VTs, VT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), + VTs, VT, MMO); + createOperands(N, Ops); + + assert(N->getMask().getValueType().getVectorNumElements() == + N->getValueType(0).getVectorNumElements() && + "Vector width mismatch between mask and data"); + assert(N->getIndex().getValueType().getVectorNumElements() >= + N->getValueType(0).getVectorNumElements() && + "Vector width mismatch between index and data"); + assert(isa(N->getScale()) && + cast(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getScatterEVL(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef Ops, + MachineMemOperand *MMO) { + assert(Ops.size() == 7 && "Incompatible number of operands"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::EVL_SCATTER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + dl.getIROrder(), VTs, VT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), + VTs, VT, MMO); + createOperands(N, Ops); + + assert(N->getMask().getValueType().getVectorNumElements() == + N->getValue().getValueType().getVectorNumElements() && + "Vector width mismatch between mask and data"); + assert(N->getIndex().getValueType().getVectorNumElements() >= + N->getValue().getValueType().getVectorNumElements() && + "Vector width mismatch between index and data"); + assert(isa(N->getScale()) && + cast(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -934,6 +934,12 @@ const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); + void visitExplicitVectorLengthIntrinsic(const EVLIntrinsic &EVLI); + void visitCmpEVL(const EVLIntrinsic &I); + void visitLoadEVL(const CallInst &I); + void visitStoreEVL(const CallInst &I); + void visitGatherEVL(const CallInst &I); + void visitScatterEVL(const CallInst &I); void visitVAStart(const CallInst &I); void visitVAArg(const VAArgInst &I); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3948,6 +3948,46 @@ setValue(&I, StoreNode); } +void SelectionDAGBuilder::visitStoreEVL(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + auto getEVLStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, + Value * &VLen) { + // llvm.masked.store.*(Src0, Ptr, Mask, VLen) + Src0 = I.getArgOperand(0); + Ptr = I.getArgOperand(1); + Mask = I.getArgOperand(2); + VLen = I.getArgOperand(3); + }; + + Value *PtrOperand, *MaskOperand, *Src0Operand, *VLenOperand; + getEVLStoreOps(PtrOperand, MaskOperand, Src0Operand, VLenOperand); + + unsigned Alignment = 0; // TODO infer alignment + + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(Src0Operand); + SDValue Mask = getValue(MaskOperand); + SDValue VLen = getValue(VLenOperand); + + EVT VT = Src0.getValueType(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + SDValue StoreNode = DAG.getStoreEVL(getRoot(), sdl, Src0, Ptr, Mask, VLen, VT, + MMO, false /* Truncating */); + DAG.setRoot(StoreNode); + setValue(&I, StoreNode); +} + // Get a uniform base for the Gather/Scatter intrinsic. // The first argument of the Gather/Scatter intrinsic is a vector of pointers. // We try to represent it as a base pointer + vector of indices. @@ -4166,6 +4206,158 @@ setValue(&I, Gather); } +void SelectionDAGBuilder::visitGatherEVL(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // @llvm.evl.gather.*(Ptrs, Mask, VLen) + const Value *Ptr = I.getArgOperand(0); + SDValue Mask = getValue(I.getArgOperand(1)); + SDValue VLen = getValue(I.getArgOperand(2)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + unsigned Alignment = 0; // TODO infer alignment //(cast(I.getArgOperand(1)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SDValue Root = DAG.getRoot(); + SDValue Base; + SDValue Index; + SDValue Scale; + const Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); + bool ConstantMemory = false; + if (UniformBase && AA && + AA->pointsToConstantMemory( + MemoryLocation(BasePtr, + LocationSize::precise( + DAG.getDataLayout().getTypeStoreSize(I.getType())), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + if (!UniformBase) { + Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(Ptr); + Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); + } + SDValue Ops[] = { Root, Base, Index, Scale, Mask, VLen }; + SDValue Gather = DAG.getGatherEVL(DAG.getVTList(VT, MVT::Other), VT, sdl, Ops, MMO); + + SDValue OutChain = Gather.getValue(1); + if (!ConstantMemory) + PendingLoads.push_back(OutChain); + setValue(&I, Gather); +} + +void SelectionDAGBuilder::visitScatterEVL(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // llvm.evl.scatter.*(Src0, Ptrs, Mask, VLen) + const Value *Ptr = I.getArgOperand(1); + SDValue Src0 = getValue(I.getArgOperand(0)); + SDValue Mask = getValue(I.getArgOperand(2)); + SDValue VLen = getValue(I.getArgOperand(3)); + EVT VT = Src0.getValueType(); + unsigned Alignment = 0; // TODO infer alignmen t(cast(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + SDValue Base; + SDValue Index; + SDValue Scale; + const Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); + + const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + if (!UniformBase) { + Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(Ptr); + Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); + } + SDValue Ops[] = { getRoot(), Src0, Base, Index, Scale, Mask, VLen }; + SDValue Scatter = DAG.getScatterEVL(DAG.getVTList(MVT::Other), VT, sdl, + Ops, MMO); + DAG.setRoot(Scatter); + setValue(&I, Scatter); +} + +void SelectionDAGBuilder::visitLoadEVL(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &VLen, + unsigned& Alignment) { + // @llvm.evl.load.*(Ptr, Mask, Vlen) + Ptr = I.getArgOperand(0); + Alignment = 0; // TODO infer alignment //Alignment = cast(I.getArgOperand(1))->getZExtValue(); + Mask = I.getArgOperand(1); + VLen = I.getArgOperand(2); + }; + + Value *PtrOperand, *MaskOperand, *VLenOperand; + unsigned Alignment; + getMaskedLoadOps(PtrOperand, MaskOperand, VLenOperand, Alignment); + + SDValue Ptr = getValue(PtrOperand); + SDValue VLen = getValue(VLenOperand); + SDValue Mask = getValue(MaskOperand); + + // infer the return type + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector ValValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValValueVTs); + EVT VT = ValValueVTs[0]; + assert((ValValueVTs.size() == 1) && "splitting not implemented"); + + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + // Do not serialize masked loads of constant memory with anything. + bool AddToChain = + !AA || !AA->pointsToConstantMemory(MemoryLocation( + PtrOperand, + LocationSize::precise( + DAG.getDataLayout().getTypeStoreSize(I.getType())), + AAInfo)); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + SDValue Load = DAG.getLoadEVL(VT, sdl, InChain, Ptr, Mask, VLen, VT, MMO, + ISD::NON_EXTLOAD); + if (AddToChain) + PendingLoads.push_back(Load.getValue(1)); + setValue(&I, Load); +} + void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); @@ -5662,6 +5854,63 @@ case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast(I)); return nullptr; + + case Intrinsic::evl_and: + case Intrinsic::evl_or: + case Intrinsic::evl_xor: + case Intrinsic::evl_ashr: + case Intrinsic::evl_lshr: + case Intrinsic::evl_shl: + + case Intrinsic::evl_select: + case Intrinsic::evl_compose: + case Intrinsic::evl_compress: + case Intrinsic::evl_expand: + case Intrinsic::evl_vshift: + + case Intrinsic::evl_load: + case Intrinsic::evl_store: + case Intrinsic::evl_gather: + case Intrinsic::evl_scatter: + + case Intrinsic::evl_fneg: + + case Intrinsic::evl_fadd: + case Intrinsic::evl_fsub: + case Intrinsic::evl_fmul: + case Intrinsic::evl_fdiv: + case Intrinsic::evl_frem: + + case Intrinsic::evl_fma: + + case Intrinsic::evl_add: + case Intrinsic::evl_sub: + case Intrinsic::evl_mul: + case Intrinsic::evl_udiv: + case Intrinsic::evl_sdiv: + case Intrinsic::evl_urem: + case Intrinsic::evl_srem: + + case Intrinsic::evl_cmp: + + case Intrinsic::evl_reduce_and: + case Intrinsic::evl_reduce_or: + case Intrinsic::evl_reduce_xor: + + case Intrinsic::evl_reduce_fadd: + case Intrinsic::evl_reduce_fmax: + case Intrinsic::evl_reduce_fmin: + case Intrinsic::evl_reduce_fmul: + + case Intrinsic::evl_reduce_add: + case Intrinsic::evl_reduce_mul: + case Intrinsic::evl_reduce_umax: + case Intrinsic::evl_reduce_umin: + case Intrinsic::evl_reduce_smax: + case Intrinsic::evl_reduce_smin: + visitExplicitVectorLengthIntrinsic(cast(I)); + return nullptr; + case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -6475,6 +6724,138 @@ setValue(&FPI, FPResult); } +void SelectionDAGBuilder::visitCmpEVL(const EVLIntrinsic &I) { + ISD::CondCode Condition; + CmpInst::Predicate predicate = I.getCmpPredicate(); + bool IsFP = I.getOperand(0)->getType()->isFPOrFPVectorTy(); + if (IsFP) { + Condition = getFCmpCondCode(predicate); + auto *FPMO = dyn_cast(&I); + if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); + + } else { + Condition = getICmpCondCode(predicate); + } + + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); +} + +void SelectionDAGBuilder::visitExplicitVectorLengthIntrinsic( + const EVLIntrinsic & EVLInst) { + SDLoc sdl = getCurSDLoc(); + unsigned Opcode; + switch (EVLInst.getIntrinsicID()) { + default: + llvm_unreachable("Unforeseen intrinsic"); // Can't reach here. + + case Intrinsic::evl_load: visitLoadEVL(EVLInst); return; + case Intrinsic::evl_store: visitStoreEVL(EVLInst); return; + case Intrinsic::evl_gather: visitGatherEVL(EVLInst); return; + case Intrinsic::evl_scatter: visitScatterEVL(EVLInst); return; + + case Intrinsic::evl_cmp: visitCmpEVL(EVLInst); return; + + case Intrinsic::evl_add: Opcode = ISD::EVL_ADD; break; + case Intrinsic::evl_sub: Opcode = ISD::EVL_SUB; break; + case Intrinsic::evl_mul: Opcode = ISD::EVL_MUL; break; + case Intrinsic::evl_udiv: Opcode = ISD::EVL_UDIV; break; + case Intrinsic::evl_sdiv: Opcode = ISD::EVL_SDIV; break; + case Intrinsic::evl_urem: Opcode = ISD::EVL_UREM; break; + case Intrinsic::evl_srem: Opcode = ISD::EVL_SREM; break; + + case Intrinsic::evl_and: Opcode = ISD::EVL_AND; break; + case Intrinsic::evl_or: Opcode = ISD::EVL_OR; break; + case Intrinsic::evl_xor: Opcode = ISD::EVL_XOR; break; + case Intrinsic::evl_ashr: Opcode = ISD::EVL_SRA; break; + case Intrinsic::evl_lshr: Opcode = ISD::EVL_SRL; break; + case Intrinsic::evl_shl: Opcode = ISD::EVL_SHL; break; + + case Intrinsic::evl_fneg: Opcode = ISD::EVL_FNEG; break; + case Intrinsic::evl_fadd: Opcode = ISD::EVL_FADD; break; + case Intrinsic::evl_fsub: Opcode = ISD::EVL_FSUB; break; + case Intrinsic::evl_fmul: Opcode = ISD::EVL_FMUL; break; + case Intrinsic::evl_fdiv: Opcode = ISD::EVL_FDIV; break; + case Intrinsic::evl_frem: Opcode = ISD::EVL_FREM; break; + + case Intrinsic::evl_fma: Opcode = ISD::EVL_FMA; break; + + case Intrinsic::evl_select: Opcode = ISD::EVL_SELECT; break; + case Intrinsic::evl_compose: Opcode = ISD::EVL_COMPOSE; break; + case Intrinsic::evl_compress: Opcode = ISD::EVL_COMPRESS; break; + case Intrinsic::evl_expand: Opcode = ISD::EVL_EXPAND; break; + case Intrinsic::evl_vshift: Opcode = ISD::EVL_VSHIFT; break; + + case Intrinsic::evl_reduce_and: Opcode = ISD::EVL_REDUCE_AND; break; + case Intrinsic::evl_reduce_or: Opcode = ISD::EVL_REDUCE_OR; break; + case Intrinsic::evl_reduce_xor: Opcode = ISD::EVL_REDUCE_XOR; break; + case Intrinsic::evl_reduce_add: Opcode = ISD::EVL_REDUCE_ADD; break; + case Intrinsic::evl_reduce_mul: Opcode = ISD::EVL_REDUCE_MUL; break; + case Intrinsic::evl_reduce_fadd: Opcode = ISD::EVL_REDUCE_FADD; break; + case Intrinsic::evl_reduce_fmul: Opcode = ISD::EVL_REDUCE_FMUL; break; + case Intrinsic::evl_reduce_smax: Opcode = ISD::EVL_REDUCE_SMAX; break; + case Intrinsic::evl_reduce_smin: Opcode = ISD::EVL_REDUCE_SMIN; break; + case Intrinsic::evl_reduce_umax: Opcode = ISD::EVL_REDUCE_UMAX; break; + case Intrinsic::evl_reduce_umin: Opcode = ISD::EVL_REDUCE_UMIN; break; + } + + // TODO memory evl: SDValue Chain = getRoot(); + + SmallVector ValueVTs; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ComputeValueVTs(TLI, DAG.getDataLayout(), EVLInst.getType(), ValueVTs); + SDVTList VTs = DAG.getVTList(ValueVTs); + + // ValueVTs.push_back(MVT::Other); // Out chain + + + SDValue Result; + + switch (EVLInst.getNumArgOperands()) { + default: + llvm_unreachable("unexpected number of arguments to evl intrinsic"); + case 3: + Result = DAG.getNode(Opcode, sdl, VTs, + { getValue(EVLInst.getArgOperand(0)), + getValue(EVLInst.getArgOperand(1)), + getValue(EVLInst.getArgOperand(2)) }); + break; + + case 4: + Result = DAG.getNode(Opcode, sdl, VTs, + { getValue(EVLInst.getArgOperand(0)), + getValue(EVLInst.getArgOperand(1)), + getValue(EVLInst.getArgOperand(2)), + getValue(EVLInst.getArgOperand(3)) }); + break; + + case 5: + Result = DAG.getNode(Opcode, sdl, VTs, + { getValue(EVLInst.getArgOperand(0)), + getValue(EVLInst.getArgOperand(1)), + getValue(EVLInst.getArgOperand(2)), + getValue(EVLInst.getArgOperand(3)), + getValue(EVLInst.getArgOperand(4)) }); + break; + } + + if (Result.getNode()->getNumValues() == 2) { + // this evl node has a chain + SDValue OutChain = Result.getValue(1); + DAG.setRoot(OutChain); + SDValue EVLResult = Result.getValue(0); + setValue(&EVLInst, EVLResult); + } else { + // this is a pure node + setValue(&EVLInst, Result); + } +} + std::pair SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB) { Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -421,6 +421,65 @@ case ISD::VECREDUCE_UMIN: return "vecreduce_umin"; case ISD::VECREDUCE_FMAX: return "vecreduce_fmax"; case ISD::VECREDUCE_FMIN: return "vecreduce_fmin"; + + // Explicit Vector Length erxtension + // EVL Memory + case ISD::EVL_LOAD: return "evl_load"; + case ISD::EVL_STORE: return "evl_store"; + case ISD::EVL_GATHER: return "evl_gather"; + case ISD::EVL_SCATTER: return "evl_scatter"; + + // EVL Unary operators + case ISD::EVL_FNEG: return "evl_fneg"; + + // EVL Binary operators + case ISD::EVL_ADD: return "evl_add"; + case ISD::EVL_SUB: return "evl_sub"; + case ISD::EVL_MUL: return "evl_mul"; + case ISD::EVL_SDIV: return "evl_sdiv"; + case ISD::EVL_UDIV: return "evl_udiv"; + case ISD::EVL_SREM: return "evl_srem"; + case ISD::EVL_UREM: return "evl_urem"; + case ISD::EVL_AND: return "evl_and"; + case ISD::EVL_OR: return "evl_or"; + case ISD::EVL_XOR: return "evl_xor"; + case ISD::EVL_SHL: return "evl_shl"; + case ISD::EVL_SRA: return "evl_sra"; + case ISD::EVL_SRL: return "evl_srl"; + case ISD::EVL_FADD: return "evl_fadd"; + case ISD::EVL_FSUB: return "evl_fsub"; + case ISD::EVL_FMUL: return "evl_fmul"; + case ISD::EVL_FDIV: return "evl_fdiv"; + case ISD::EVL_FREM: return "evl_frem"; + + // EVL comparison + case ISD::EVL_SETCC: return "evl_setcc"; + + // EVL ternary operators + case ISD::EVL_FMA: return "evl_fma"; + + // EVL shuffle + case ISD::EVL_VSHIFT: return "evl_vshift"; + case ISD::EVL_COMPRESS: return "evl_compress"; + case ISD::EVL_EXPAND: return "evl_expand"; + + case ISD::EVL_COMPOSE: return "evl_compose"; + case ISD::EVL_SELECT: return "evl_select"; + + // EVL reduction operators + case ISD::EVL_REDUCE_FADD: return "evl_reduce_fadd"; + case ISD::EVL_REDUCE_FMUL: return "evl_reduce_fmul"; + case ISD::EVL_REDUCE_ADD: return "evl_reduce_add"; + case ISD::EVL_REDUCE_MUL: return "evl_reduce_mul"; + case ISD::EVL_REDUCE_AND: return "evl_reduce_and"; + case ISD::EVL_REDUCE_OR: return "evl_reduce_or"; + case ISD::EVL_REDUCE_XOR: return "evl_reduce_xor"; + case ISD::EVL_REDUCE_SMAX: return "evl_reduce_smax"; + case ISD::EVL_REDUCE_SMIN: return "evl_reduce_smin"; + case ISD::EVL_REDUCE_UMAX: return "evl_reduce_umax"; + case ISD::EVL_REDUCE_UMIN: return "evl_reduce_umin"; + case ISD::EVL_REDUCE_FMAX: return "evl_reduce_fmax"; + case ISD::EVL_REDUCE_FMIN: return "evl_reduce_fmin"; } } Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -753,6 +753,10 @@ CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); } + if (getenv("SDEBUG")) { + CurDAG->dump(); + } + #ifndef NDEBUG if (TTI.hasBranchDivergence()) CurDAG->VerifyDAGDiverence(); Index: lib/IR/Attributes.cpp =================================================================== --- lib/IR/Attributes.cpp +++ lib/IR/Attributes.cpp @@ -256,6 +256,8 @@ return "byval"; if (hasAttribute(Attribute::Convergent)) return "convergent"; + if (hasAttribute(Attribute::VectorLength)) + return "vlen"; if (hasAttribute(Attribute::SwiftError)) return "swifterror"; if (hasAttribute(Attribute::SwiftSelf)) @@ -272,6 +274,10 @@ return "inreg"; if (hasAttribute(Attribute::JumpTable)) return "jumptable"; + if (hasAttribute(Attribute::Mask)) + return "mask"; + if (hasAttribute(Attribute::Passthru)) + return "passthru"; if (hasAttribute(Attribute::MinSize)) return "minsize"; if (hasAttribute(Attribute::Naked)) Index: lib/IR/CMakeLists.txt =================================================================== --- lib/IR/CMakeLists.txt +++ lib/IR/CMakeLists.txt @@ -23,6 +23,7 @@ DiagnosticPrinter.cpp Dominators.cpp DomTreeUpdater.cpp + EVLBuilder.cpp Function.cpp GVMaterializer.cpp Globals.cpp Index: lib/IR/EVLBuilder.cpp =================================================================== --- /dev/null +++ lib/IR/EVLBuilder.cpp @@ -0,0 +1,251 @@ +#include +#include +#include + +#include + +namespace llvm { + +Module & +EVLBuilder::getModule() const { + return *Builder.GetInsertBlock()->getParent()->getParent(); +} + +EVLIntrinsicDesc +EVLBuilder::GetEVLIntrinsicDesc(unsigned OC) { + switch (OC) { + // fp unary + case Instruction::FNeg: return EVLIntrinsicDesc{ Intrinsic::evl_fneg, TypeTokenVec{EVLTypeToken::Vector}, 1, 2}; break; + + // fp binary + case Instruction::FAdd: return EVLIntrinsicDesc{ Intrinsic::evl_fadd, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::FSub: return EVLIntrinsicDesc{ Intrinsic::evl_fsub, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::FMul: return EVLIntrinsicDesc{ Intrinsic::evl_fmul, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::FDiv: return EVLIntrinsicDesc{ Intrinsic::evl_fdiv, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::FRem: return EVLIntrinsicDesc{ Intrinsic::evl_frem, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + + // sign-oblivious int + case Instruction::Add: return EVLIntrinsicDesc{ Intrinsic::evl_add, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::Sub: return EVLIntrinsicDesc{ Intrinsic::evl_sub, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::Mul: return EVLIntrinsicDesc{ Intrinsic::evl_mul, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + + // signed/unsigned int + case Instruction::SDiv: return EVLIntrinsicDesc{ Intrinsic::evl_sdiv, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::UDiv: return EVLIntrinsicDesc{ Intrinsic::evl_udiv, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::SRem: return EVLIntrinsicDesc{ Intrinsic::evl_srem, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::URem: return EVLIntrinsicDesc{ Intrinsic::evl_urem, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + + // logical + case Instruction::Or: return EVLIntrinsicDesc{ Intrinsic::evl_or, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::And: return EVLIntrinsicDesc{ Intrinsic::evl_and, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::Xor: return EVLIntrinsicDesc{ Intrinsic::evl_xor, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + + case Instruction::LShr: return EVLIntrinsicDesc{ Intrinsic::evl_lshr, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::AShr: return EVLIntrinsicDesc{ Intrinsic::evl_ashr, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + case Instruction::Shl: return EVLIntrinsicDesc{ Intrinsic::evl_shl, TypeTokenVec{EVLTypeToken::Vector}, 2, 3}; break; + + // comparison + case Instruction::ICmp: + case Instruction::FCmp: + return EVLIntrinsicDesc{ Intrinsic::evl_cmp, TypeTokenVec{EVLTypeToken::Mask, EVLTypeToken::Vector}, 2, 3}; break; + + default: + return EVLIntrinsicDesc{Intrinsic::not_intrinsic, TypeTokenVec(), -1, -1}; + } +} + +static +ShortTypeVec +EncodeTypeTokens(TypeTokenVec TTVec, Type & VectorTy, Type & ScalarTy) { + ShortTypeVec STV; + + for (auto Token : TTVec) { + switch (Token) { + default: + llvm_unreachable("unsupported token"); // unsupported EVLTypeToken + + case EVLTypeToken::Scalar: STV.push_back(&ScalarTy); break; + case EVLTypeToken::Vector: STV.push_back(&VectorTy); break; + case EVLTypeToken::Mask: + auto NumElems = VectorTy.getVectorNumElements(); + auto MaskTy = VectorType::get(Type::getInt1Ty(VectorTy.getContext()), NumElems); + STV.push_back(MaskTy); break; + + } + } + + return STV; +} + +Value& +EVLBuilder::GetMaskForType(VectorType & VecTy) { + if (Mask) return *Mask; + + auto * boolTy = Builder.getInt1Ty(); + auto * maskTy = VectorType::get(boolTy, StaticVectorLength); + return *ConstantInt::getAllOnesValue(maskTy); +} + +Value& +EVLBuilder::GetEVLForType(VectorType & VecTy) { + if (ExplicitVectorLength) return *ExplicitVectorLength; + + // TODO SVE + auto * intTy = Builder.getInt32Ty(); + return *ConstantInt::get(intTy, StaticVectorLength); +} + +Value* +EVLBuilder::CreateVectorCopy(Instruction & Inst, ValArray VecOpArray) { + auto oc = Inst.getOpcode(); + + auto evlDesc = GetEVLIntrinsicDesc(oc); + if (evlDesc.ID == Intrinsic::not_intrinsic) { + return nullptr; + } + + if ((oc <= Instruction::BinaryOpsEnd) && + (oc >= Instruction::BinaryOpsBegin)) { + + assert(VecOpArray.size() == 2); + Value & FirstOp = *VecOpArray[0]; + Value & SndOp = *VecOpArray[1]; + + // Fetch the EVL intrinsic + auto & VecTy = cast(*FirstOp.getType()); + auto & ScalarTy = *VecTy.getVectorElementType(); + auto * Func = Intrinsic::getDeclaration(&getModule(), evlDesc.ID, EncodeTypeTokens(evlDesc.typeTokens, VecTy, ScalarTy)); + + assert((evlDesc.MaskPos == 2) && (evlDesc.EVLPos == 3)); + + // Materialize the Call + ShortValueVec Args{&FirstOp, &SndOp, &GetMaskForType(VecTy), &GetEVLForType(VecTy)}; + + auto & EVLCall = *Builder.CreateCall(Func, Args); + + // transfer fast math flags + if (isa(Inst)) { + cast(EVLCall).copyFastMathFlags(Inst.getFastMathFlags()); + } + + return &EVLCall; + } + + if ((oc <= Instruction::UnaryOpsBegin) && + (oc >= Instruction::UnaryOpsEnd)) { + assert(VecOpArray.size() == 1); + Value & FirstOp = *VecOpArray[0]; + + // Fetch the EVL intrinsic + auto & VecTy = cast(*FirstOp.getType()); + auto & ScalarTy = *VecTy.getVectorElementType(); + auto * Func = Intrinsic::getDeclaration(&getModule(), evlDesc.ID, EncodeTypeTokens(evlDesc.typeTokens, VecTy, ScalarTy)); + + assert((evlDesc.MaskPos == 1) && (evlDesc.EVLPos == 2)); + + // Materialize the Call + ShortValueVec Args{&FirstOp, &GetMaskForType(VecTy), &GetEVLForType(VecTy)}; + + auto & EVLCall = *Builder.CreateCall(Func, Args); + + // transfer fast math flags + if (isa(Inst)) { + cast(EVLCall).copyFastMathFlags(Inst.getFastMathFlags()); + } + + return &EVLCall; + } + + switch (oc) { + default: + return nullptr; + + case Instruction::FCmp: + case Instruction::ICmp: { + assert(VecOpArray.size() == 2); + Value & FirstOp = *VecOpArray[0]; + Value & SndOp = *VecOpArray[1]; + + // Fetch the EVL intrinsic + auto & VecTy = cast(*FirstOp.getType()); + auto & ScalarTy = *VecTy.getVectorElementType(); + auto * Func = Intrinsic::getDeclaration(&getModule(), evlDesc.ID, EncodeTypeTokens(evlDesc.typeTokens, VecTy, ScalarTy)); + + assert((evlDesc.MaskPos == 2) && (evlDesc.EVLPos == 3)); + + // encode comparison predicate as MD + uint8_t RawPred = cast(Inst).getPredicate(); + auto Int8Ty = Builder.getInt8Ty(); + auto PredArg = ConstantInt::get(Int8Ty, RawPred, false); + + // Materialize the Call + ShortValueVec Args{&FirstOp, &SndOp, &GetMaskForType(VecTy), &GetEVLForType(VecTy), PredArg}; + + return Builder.CreateCall(Func, Args); + } + + case Instruction::Select: { + assert(VecOpArray.size() == 2); + Value & MaskOp = *VecOpArray[0]; + Value & OnTrueOp = *VecOpArray[1]; + Value & OnFalseOp = *VecOpArray[2]; + + // Fetch the EVL intrinsic + auto & VecTy = cast(*OnTrueOp.getType()); + auto & ScalarTy = *VecTy.getVectorElementType(); + + auto * Func = Intrinsic::getDeclaration(&getModule(), evlDesc.ID, EncodeTypeTokens(evlDesc.typeTokens, VecTy, ScalarTy)); + + assert((evlDesc.MaskPos == 2) && (evlDesc.EVLPos == 3)); + + // Materialize the Call + ShortValueVec Args{&OnTrueOp, &OnFalseOp, &MaskOp, &GetEVLForType(VecTy)}; + + return Builder.CreateCall(Func, Args); + } + } +} + +VectorType& +EVLBuilder::getVectorType(Type &ElementTy) { + return *VectorType::get(&ElementTy, StaticVectorLength); +} + +Value& +EVLBuilder::CreateContiguousStore(Value & Val, Value & Pointer) { + auto & VecTy = cast(*Val.getType()); + auto * StoreFunc = Intrinsic::getDeclaration(&getModule(), Intrinsic::evl_store, {Val.getType(), Pointer.getType()}); + ShortValueVec Args{&Val, &Pointer, &GetMaskForType(VecTy), &GetEVLForType(VecTy)}; + return *Builder.CreateCall(StoreFunc, Args); +} + +Value& +EVLBuilder::CreateContiguousLoad(Value & Pointer) { + auto & PointerTy = cast(*Pointer.getType()); + auto & VecTy = getVectorType(*PointerTy.getPointerElementType()); + + auto * LoadFunc = Intrinsic::getDeclaration(&getModule(), Intrinsic::evl_load, {&VecTy, &PointerTy}); + ShortValueVec Args{&Pointer, &GetMaskForType(VecTy), &GetEVLForType(VecTy)}; + return *Builder.CreateCall(LoadFunc, Args); +} + +Value& +EVLBuilder::CreateScatter(Value & Val, Value & PointerVec) { + auto & VecTy = cast(*Val.getType()); + auto * ScatterFunc = Intrinsic::getDeclaration(&getModule(), Intrinsic::evl_scatter, {Val.getType(), PointerVec.getType()}); + ShortValueVec Args{&Val, &PointerVec, &GetMaskForType(VecTy), &GetEVLForType(VecTy)}; + return *Builder.CreateCall(ScatterFunc, Args); +} + +Value& +EVLBuilder::CreateGather(Value & PointerVec) { + auto & PointerVecTy = cast(*PointerVec.getType()); + auto & ElemTy = *cast(*PointerVecTy.getVectorElementType()).getPointerElementType(); + auto & VecTy = *VectorType::get(&ElemTy, PointerVecTy.getNumElements()); + auto * GatherFunc = Intrinsic::getDeclaration(&getModule(), Intrinsic::evl_gather, {&VecTy, &PointerVecTy}); + + ShortValueVec Args{&PointerVec, &GetMaskForType(VecTy), &GetEVLForType(VecTy)}; + return *Builder.CreateCall(GatherFunc, Args); +} + +} // namespace llvm Index: lib/IR/IntrinsicInst.cpp =================================================================== --- lib/IR/IntrinsicInst.cpp +++ lib/IR/IntrinsicInst.cpp @@ -137,6 +137,89 @@ .Default(ebInvalid); } +CmpInst::Predicate +EVLIntrinsic::getCmpPredicate() const { + return static_cast(cast(getArgOperand(4))->getZExtValue()); +} + +bool EVLIntrinsic::isUnaryOp() const { + switch (getIntrinsicID()) { + default: + return false; + case Intrinsic::evl_fneg: + return true; + } +} + +Value* +EVLIntrinsic::GetMask() const { + if (isBinaryOp()) { return getArgOperand(2); } + else if (isTernaryOp()) { return getArgOperand(3); } + else if (isUnaryOp()) { return getArgOperand(1); } + else return nullptr; +} + +Value* +EVLIntrinsic::GetVectorLength() const { + if (isBinaryOp()) { return getArgOperand(3); } + else if (isTernaryOp()) { return getArgOperand(4); } + else if (isUnaryOp()) { return getArgOperand(2); } + else return nullptr; +} + +bool EVLIntrinsic::isBinaryOp() const { + switch (getIntrinsicID()) { + default: + return false; + + case Intrinsic::evl_and: + case Intrinsic::evl_or: + case Intrinsic::evl_xor: + case Intrinsic::evl_ashr: + case Intrinsic::evl_lshr: + case Intrinsic::evl_shl: + + case Intrinsic::evl_fadd: + case Intrinsic::evl_fsub: + case Intrinsic::evl_fmul: + case Intrinsic::evl_fdiv: + case Intrinsic::evl_frem: + + case Intrinsic::evl_reduce_or: + case Intrinsic::evl_reduce_xor: + case Intrinsic::evl_reduce_add: + case Intrinsic::evl_reduce_mul: + case Intrinsic::evl_reduce_smax: + case Intrinsic::evl_reduce_smin: + case Intrinsic::evl_reduce_umax: + case Intrinsic::evl_reduce_umin: + + case Intrinsic::evl_reduce_fadd: + case Intrinsic::evl_reduce_fmul: + case Intrinsic::evl_reduce_fmax: + case Intrinsic::evl_reduce_fmin: + + case Intrinsic::evl_add: + case Intrinsic::evl_sub: + case Intrinsic::evl_mul: + case Intrinsic::evl_udiv: + case Intrinsic::evl_sdiv: + case Intrinsic::evl_urem: + case Intrinsic::evl_srem: + return true; + } +} + +bool EVLIntrinsic::isTernaryOp() const { + switch (getIntrinsicID()) { + default: + return false; + case Intrinsic::evl_fma: + case Intrinsic::evl_select: + return true; + } +} + bool ConstrainedFPIntrinsic::isUnaryOp() const { switch (getIntrinsicID()) { default: Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -1652,11 +1652,14 @@ if (Attrs.isEmpty()) return; + bool SawMask = false; bool SawNest = false; + bool SawPassthru = false; bool SawReturned = false; bool SawSRet = false; bool SawSwiftSelf = false; bool SawSwiftError = false; + bool SawVectorLength = false; // Verify return value attributes. AttributeSet RetAttrs = Attrs.getRetAttributes(); @@ -1719,12 +1722,33 @@ SawSwiftError = true; } + if (ArgAttrs.hasAttribute(Attribute::VectorLength)) { + Assert(!SawVectorLength, "Cannot have multiple 'vlen' parameters!", + V); + SawVectorLength = true; + } + + if (ArgAttrs.hasAttribute(Attribute::Passthru)) { + Assert(!SawPassthru, "Cannot have multiple 'passthru' parameters!", + V); + SawPassthru = true; + } + + if (ArgAttrs.hasAttribute(Attribute::Mask)) { + Assert(!SawMask, "Cannot have multiple 'mask' parameters!", + V); + SawMask = true; + } + if (ArgAttrs.hasAttribute(Attribute::InAlloca)) { Assert(i == FT->getNumParams() - 1, "inalloca isn't on the last parameter!", V); } } + Assert(!SawPassthru || SawMask, + "Cannot have 'passthru' parameter without 'mask' parameter!", V); + if (!Attrs.hasAttributes(AttributeList::FunctionIndex)) return; @@ -3041,7 +3065,7 @@ /// visitUnaryOperator - Check the argument to the unary operator. /// void Verifier::visitUnaryOperator(UnaryOperator &U) { - Assert(U.getType() == U.getOperand(0)->getType(), + Assert(U.getType() == U.getOperand(0)->getType(), "Unary operators must have same type for" "operands and result!", &U); @@ -4870,7 +4894,7 @@ bool runOnFunction(Function &F) override { if (!V->verify(F) && FatalErrors) { - errs() << "in function " << F.getName() << '\n'; + errs() << "in function " << F.getName() << '\n'; report_fatal_error("Broken function found, compilation aborted!"); } return false; Index: lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- lib/Transforms/Utils/CodeExtractor.cpp +++ lib/Transforms/Utils/CodeExtractor.cpp @@ -773,6 +773,7 @@ case Attribute::InaccessibleMemOnly: case Attribute::InaccessibleMemOrArgMemOnly: case Attribute::JumpTable: + case Attribute::Mask: case Attribute::Naked: case Attribute::Nest: case Attribute::NoAlias: @@ -781,6 +782,7 @@ case Attribute::NoReturn: case Attribute::None: case Attribute::NonNull: + case Attribute::Passthru: case Attribute::ReadNone: case Attribute::ReadOnly: case Attribute::Returned: @@ -791,6 +793,7 @@ case Attribute::StructRet: case Attribute::SwiftError: case Attribute::SwiftSelf: + case Attribute::VectorLength: case Attribute::WriteOnly: case Attribute::ZExt: case Attribute::EndAttrKinds: Index: test/Bitcode/attributes.ll =================================================================== --- test/Bitcode/attributes.ll +++ test/Bitcode/attributes.ll @@ -351,6 +351,11 @@ ret void } +; CHECK: define <8 x double> @f60(<8 x double> passthru, <8 x i1> mask, i32 vlen) { +define <8 x double> @f60(<8 x double> passthru, <8 x i1> mask, i32 vlen) { + ret <8 x double> undef +} + ; CHECK: attributes #0 = { noreturn } ; CHECK: attributes #1 = { nounwind } ; CHECK: attributes #2 = { readnone } Index: test/Verifier/evl_attribs.ll =================================================================== --- /dev/null +++ test/Verifier/evl_attribs.ll @@ -0,0 +1,13 @@ +; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s + +declare void @a(<16 x i1> mask %a, <16 x i1> mask %b) +; CHECK: Cannot have multiple 'mask' parameters! + +declare void @b(<16 x i1> mask %a, i32 vlen %x, i32 vlen %y) +; CHECK: Cannot have multiple 'vlen' parameters! + +declare <16 x double> @c(<16 x double> passthru %a) +; CHECK: Cannot have 'passthru' parameter without 'mask' parameter! + +declare <16 x double> @d(<16 x double> passthru %a, <16 x i1> mask %M, <16 x double> passthru %b) +; CHECK: Cannot have multiple 'passthru' parameters! Index: utils/TableGen/CodeGenIntrinsics.h =================================================================== --- utils/TableGen/CodeGenIntrinsics.h +++ utils/TableGen/CodeGenIntrinsics.h @@ -136,7 +136,7 @@ // True if the intrinsic is marked as speculatable. bool isSpeculatable; - enum ArgAttribute { NoCapture, Returned, ReadOnly, WriteOnly, ReadNone }; + enum ArgAttribute { Mask, NoCapture, Passthru, Returned, ReadOnly, WriteOnly, ReadNone, VectorLength }; std::vector> ArgumentAttributes; bool hasProperty(enum SDNP Prop) const { Index: utils/TableGen/CodeGenTarget.cpp =================================================================== --- utils/TableGen/CodeGenTarget.cpp +++ utils/TableGen/CodeGenTarget.cpp @@ -599,10 +599,10 @@ "Expected iAny or vAny type"); } else { VT = getValueType(TyEl->getValueAsDef("VT")); - } - if (MVT(VT).isOverloaded()) { - OverloadedVTs.push_back(VT); - isOverloaded = true; + if (MVT(VT).isOverloaded()) { + OverloadedVTs.push_back(VT); + isOverloaded = true; + } } // Reject invalid types. @@ -636,14 +636,15 @@ !TyEl->isSubClassOf("LLVMVectorSameWidth")) || VT == MVT::iAny || VT == MVT::vAny) && "Expected iAny or vAny type"); - } else + } else { VT = getValueType(TyEl->getValueAsDef("VT")); - - if (MVT(VT).isOverloaded()) { - OverloadedVTs.push_back(VT); - isOverloaded = true; + if (MVT(VT).isOverloaded()) { + OverloadedVTs.push_back(VT); + isOverloaded = true; + } } + // Reject invalid types. if (VT == MVT::isVoid && i != e-1 /*void at end means varargs*/) PrintFatalError("Intrinsic '" + DefName + " has void in result type list!"); @@ -694,6 +695,15 @@ } else if (Property->isSubClassOf("Returned")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); ArgumentAttributes.push_back(std::make_pair(ArgNo, Returned)); + } else if (Property->isSubClassOf("VectorLength")) { + unsigned ArgNo = Property->getValueAsInt("ArgNo"); + ArgumentAttributes.push_back(std::make_pair(ArgNo, VectorLength)); + } else if (Property->isSubClassOf("Mask")) { + unsigned ArgNo = Property->getValueAsInt("ArgNo"); + ArgumentAttributes.push_back(std::make_pair(ArgNo, Mask)); + } else if (Property->isSubClassOf("Passthru")) { + unsigned ArgNo = Property->getValueAsInt("ArgNo"); + ArgumentAttributes.push_back(std::make_pair(ArgNo, Passthru)); } else if (Property->isSubClassOf("ReadOnly")) { unsigned ArgNo = Property->getValueAsInt("ArgNo"); ArgumentAttributes.push_back(std::make_pair(ArgNo, ReadOnly)); Index: utils/TableGen/IntrinsicEmitter.cpp =================================================================== --- utils/TableGen/IntrinsicEmitter.cpp +++ utils/TableGen/IntrinsicEmitter.cpp @@ -594,6 +594,24 @@ OS << "Attribute::Returned"; addComma = true; break; + case CodeGenIntrinsic::VectorLength: + if (addComma) + OS << ","; + OS << "Attribute::VectorLength"; + addComma = true; + break; + case CodeGenIntrinsic::Mask: + if (addComma) + OS << ","; + OS << "Attribute::Mask"; + addComma = true; + break; + case CodeGenIntrinsic::Passthru: + if (addComma) + OS << ","; + OS << "Attribute::Passthru"; + addComma = true; + break; case CodeGenIntrinsic::ReadOnly: if (addComma) OS << ",";