Index: ../include/llvm/CodeGen/SelectionDAG.h =================================================================== --- ../include/llvm/CodeGen/SelectionDAG.h +++ ../include/llvm/CodeGen/SelectionDAG.h @@ -964,6 +964,14 @@ ArrayRef Ops, MachineMemOperand *MMO); SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO); + + /// Return (create a new or find existing) a target-specific node. + /// TargetMemSDNode should be derived class from MemSDNode. + template + SDValue getTargetMemSDNode(SDVTList VTs, ArrayRef Ops, + const SDLoc &dl, EVT MemVT, + MachineMemOperand *MMO); + /// Construct a node to track a Value* through the backend. SDValue getSrcValue(const Value *v); @@ -1418,6 +1426,42 @@ } }; +template +SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs, + ArrayRef Ops, + const SDLoc &dl, EVT MemVT, + MachineMemOperand *MMO) { + + /// Compose node ID and try to find an existing node. + FoldingSetNodeID ID; + unsigned Opcode = + TargetMemSDNode(dl.getIROrder(), DebugLoc(), VTs, MemVT, MMO).getOpcode(); + ID.AddInteger(Opcode); + ID.AddPointer(VTs.VTs); + for (auto& Op : Ops) { + ID.AddPointer(Op.getNode()); + ID.AddInteger(Op.getResNo()); + } + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(getSyntheticNodeSubclassData( + dl.getIROrder(), VTs, MemVT, MMO)); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + /// Existing node was not found. Create a new one. + auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, + MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + } // end namespace llvm #endif Index: ../lib/Target/X86/X86ISelLowering.h =================================================================== --- ../lib/Target/X86/X86ISelLowering.h +++ ../lib/Target/X86/X86ISelLowering.h @@ -606,7 +606,12 @@ /// This instruction grabs the address of the next argument /// from a va_list. (reads and modifies the va_list in memory) - VAARG_64 + VAARG_64, + + // Vector truncating store with unsigned/signed saturation + VTRUNCSTOREUS, VTRUNCSTORES, + // Vector truncating masked store with unsigned/signed saturation + VMTRUNCSTOREUS, VMTRUNCSTORES // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all @@ -1289,6 +1294,93 @@ FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } // end namespace X86 + + // Base class for all X86 non-masked store operations. + class X86StoreSDNode : public MemSDNode { + public: + X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, + SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO) + :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} + const SDValue &getValue() const { return getOperand(1); } + const SDValue &getBasePtr() const { return getOperand(2); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::VTRUNCSTORES || + N->getOpcode() == X86ISD::VTRUNCSTOREUS; + } + }; + + // Base class for all X86 masked store operations. + // The class has the same order of operands as MaskedStoreSDNode for + // convenience. + class X86MaskedStoreSDNode : public MemSDNode { + public: + X86MaskedStoreSDNode(unsigned Opcode, unsigned Order, + const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO) + : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} + + const SDValue &getBasePtr() const { return getOperand(1); } + const SDValue &getMask() const { return getOperand(2); } + const SDValue &getValue() const { return getOperand(3); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::VMTRUNCSTORES || + N->getOpcode() == X86ISD::VMTRUNCSTOREUS; + } + }; + + // X86 Truncating Store with Signed saturation. + class TruncSStoreSDNode : public X86StoreSDNode { + public: + TruncSStoreSDNode(unsigned Order, const DebugLoc &dl, + SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) + : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} + + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::VTRUNCSTORES; + } + }; + + // X86 Truncating Store with Unsigned saturation. + class TruncUSStoreSDNode : public X86StoreSDNode { + public: + TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl, + SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) + : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} + + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::VTRUNCSTOREUS; + } + }; + + // X86 Truncating Masked Store with Signed saturation. + class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode { + public: + MaskedTruncSStoreSDNode(unsigned Order, + const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO) + : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} + + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::VMTRUNCSTORES; + } + }; + + // X86 Truncating Masked Store with Unsigned saturation. + class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode { + public: + MaskedTruncUSStoreSDNode(unsigned Order, + const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO) + : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} + + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::VMTRUNCSTOREUS; + } + }; + } // end namespace llvm #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H Index: ../lib/Target/X86/X86ISelLowering.cpp =================================================================== --- ../lib/Target/X86/X86ISelLowering.cpp +++ ../lib/Target/X86/X86ISelLowering.cpp @@ -19499,6 +19499,33 @@ return Chain; } +/// Emit Truncating Store with signed or unsigned saturation. +static SDValue +EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val, + SDValue Ptr, EVT MemVT, MachineMemOperand *MMO, + SelectionDAG &DAG) { + + SDVTList VTs = DAG.getVTList(MVT::Other); + SDValue Undef = DAG.getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + return SignedSat ? + DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO) : + DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO); +} + +/// Emit Masked Truncating Store with signed or unsigned saturation. +static SDValue +EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, + SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, SelectionDAG &DAG) { + + SDVTList VTs = DAG.getVTList(MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, Val }; + return SignedSat ? + DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO) : + DAG.getTargetMemSDNode(VTs, Ops, Dl, MemVT, MMO); +} + static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); @@ -19657,18 +19684,39 @@ MemIntrinsicSDNode *MemIntr = dyn_cast(Op); assert(MemIntr && "Expected MemIntrinsicSDNode!"); - EVT VT = MemIntr->getMemoryVT(); + EVT MemVT = MemIntr->getMemoryVT(); - if (isAllOnesConstant(Mask)) // return just a truncate store - return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, VT, - MemIntr->getMemOperand()); + uint16_t TruncationOp = IntrData->Opc0; + switch (TruncationOp) { + case X86ISD::VTRUNC: { + if (isAllOnesConstant(Mask)) // return just a truncate store + return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MemVT, + MemIntr->getMemOperand()); - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); - SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); - return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, VT, - MemIntr->getMemOperand(), true /* truncating */); + return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT, + MemIntr->getMemOperand(), true /* truncating */); + } + case X86ISD::VTRUNCUS: + case X86ISD::VTRUNCS: { + bool IsSigned = (TruncationOp == X86ISD::VTRUNCS); + if (isAllOnesConstant(Mask)) + return EmitTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, MemVT, + MemIntr->getMemOperand(), DAG); + + MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + + return EmitMaskedTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, + VMask, MemVT, MemIntr->getMemOperand(), DAG); + } + default: + llvm_unreachable("Unsupported truncstore intrinsic"); + } } + case EXPAND_FROM_MEM: { SDValue Mask = Op.getOperand(4); SDValue PassThru = Op.getOperand(3); @@ -23404,6 +23452,10 @@ case X86ISD::VTRUNC: return "X86ISD::VTRUNC"; case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS"; case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS"; + case X86ISD::VTRUNCSTORES: return "X86ISD::VTRUNCSTORES"; + case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS"; + case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES"; + case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS"; case X86ISD::VINSERT: return "X86ISD::VINSERT"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND"; Index: ../lib/Target/X86/X86InstrAVX512.td =================================================================== --- ../lib/Target/X86/X86InstrAVX512.td +++ ../lib/Target/X86/X86InstrAVX512.td @@ -7309,23 +7309,6 @@ addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; } -multiclass avx512_trunc_sat_mr_lowering { - - def: Pat<(!cast("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# - DestInfo.Suffix#"_mem_"#SrcInfo.Size) - addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask), - (!cast(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr, - (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM), - (SrcInfo.VT SrcInfo.RC:$src))>; - - def: Pat<(!cast("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# - DestInfo.Suffix#"_mem_"#SrcInfo.Size) - addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1), - (!cast(NAME#SrcInfo.ZSuffix##mr) addr:$ptr, - (SrcInfo.VT SrcInfo.RC:$src))>; -} - multiclass avx512_trunc opc, string OpcodeStr, SDNode OpNode, AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, @@ -7351,119 +7334,89 @@ truncFrag, mtruncFrag>, EVEX_V512; } -multiclass avx512_trunc_sat opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, - X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, - X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, - X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{ - - let Predicates = [HasVLX, prd] in { - defm Z128: avx512_trunc_common, - avx512_trunc_sat_mr_lowering, EVEX_V128; - - defm Z256: avx512_trunc_common, - avx512_trunc_sat_mr_lowering, EVEX_V256; - } - let Predicates = [prd] in - defm Z: avx512_trunc_common, - avx512_trunc_sat_mr_lowering, EVEX_V512; -} - -multiclass avx512_trunc_qb opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_trunc_qb opc, string OpcodeStr, SDNode OpNode, + PatFrag StoreNode, PatFrag MaskedStoreNode> { defm NAME: avx512_trunc, EVEX_CD8<8, CD8VO>; -} -multiclass avx512_trunc_sat_qb opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat, EVEX_CD8<8, CD8VO>; + StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VO>; } -multiclass avx512_trunc_qw opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_trunc_qw opc, string OpcodeStr, SDNode OpNode, + PatFrag StoreNode, PatFrag MaskedStoreNode> { defm NAME: avx512_trunc, EVEX_CD8<16, CD8VQ>; -} -multiclass avx512_trunc_sat_qw opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VQ>; + StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; } -multiclass avx512_trunc_qd opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_trunc_qd opc, string OpcodeStr, SDNode OpNode, + PatFrag StoreNode, PatFrag MaskedStoreNode> { defm NAME: avx512_trunc, EVEX_CD8<32, CD8VH>; -} -multiclass avx512_trunc_sat_qd opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat, EVEX_CD8<32, CD8VH>; + StoreNode, MaskedStoreNode>, EVEX_CD8<32, CD8VH>; } -multiclass avx512_trunc_db opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_trunc_db opc, string OpcodeStr, SDNode OpNode, + PatFrag StoreNode, PatFrag MaskedStoreNode> { defm NAME: avx512_trunc, EVEX_CD8<8, CD8VQ>; -} -multiclass avx512_trunc_sat_db opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat, EVEX_CD8<8, CD8VQ>; + StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; } -multiclass avx512_trunc_dw opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_trunc_dw opc, string OpcodeStr, SDNode OpNode, + PatFrag StoreNode, PatFrag MaskedStoreNode> { defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; -} -multiclass avx512_trunc_sat_dw opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VH>; + StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VH>; } -multiclass avx512_trunc_wb opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_trunc_wb opc, string OpcodeStr, SDNode OpNode, + PatFrag StoreNode, PatFrag MaskedStoreNode> { defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; + StoreNode, MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; } -multiclass avx512_trunc_sat_wb opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat, EVEX_CD8<16, CD8VH>; -} - -defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>; -defm VPMOVSQB : avx512_trunc_sat_qb<0x22, "s", X86vtruncs>; -defm VPMOVUSQB : avx512_trunc_sat_qb<0x12, "us", X86vtruncus>; - -defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>; -defm VPMOVSQW : avx512_trunc_sat_qw<0x24, "s", X86vtruncs>; -defm VPMOVUSQW : avx512_trunc_sat_qw<0x14, "us", X86vtruncus>; - -defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>; -defm VPMOVSQD : avx512_trunc_sat_qd<0x25, "s", X86vtruncs>; -defm VPMOVUSQD : avx512_trunc_sat_qd<0x15, "us", X86vtruncus>; - -defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>; -defm VPMOVSDB : avx512_trunc_sat_db<0x21, "s", X86vtruncs>; -defm VPMOVUSDB : avx512_trunc_sat_db<0x11, "us", X86vtruncus>; - -defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>; -defm VPMOVSDW : avx512_trunc_sat_dw<0x23, "s", X86vtruncs>; -defm VPMOVUSDW : avx512_trunc_sat_dw<0x13, "us", X86vtruncus>; -defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>; -defm VPMOVSWB : avx512_trunc_sat_wb<0x20, "s", X86vtruncs>; -defm VPMOVUSWB : avx512_trunc_sat_wb<0x10, "us", X86vtruncus>; +defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc, + truncstorevi8, masked_truncstorevi8>; +defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, + truncstore_s_vi8, masked_truncstore_s_vi8>; +defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, + truncstore_us_vi8, masked_truncstore_us_vi8>; + +defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc, + truncstorevi16, masked_truncstorevi16>; +defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, + truncstore_s_vi16, masked_truncstore_s_vi16>; +defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, + truncstore_us_vi16, masked_truncstore_us_vi16>; + +defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc, + truncstorevi32, masked_truncstorevi32>; +defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, + truncstore_s_vi32, masked_truncstore_s_vi32>; +defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, + truncstore_us_vi32, masked_truncstore_us_vi32>; + +defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc, + truncstorevi8, masked_truncstorevi8>; +defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, + truncstore_s_vi8, masked_truncstore_s_vi8>; +defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, + truncstore_us_vi8, masked_truncstore_us_vi8>; + +defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc, + truncstorevi16, masked_truncstorevi16>; +defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, + truncstore_s_vi16, masked_truncstore_s_vi16>; +defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, + truncstore_us_vi16, masked_truncstore_us_vi16>; + +defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc, + truncstorevi8, masked_truncstorevi8>; +defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, + truncstore_s_vi8, masked_truncstore_s_vi8>; +defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, + truncstore_us_vi8, masked_truncstore_us_vi8>; let Predicates = [HasAVX512, NoVLX] in { def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))), Index: ../lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- ../lib/Target/X86/X86InstrFragmentsSIMD.td +++ ../lib/Target/X86/X86InstrFragmentsSIMD.td @@ -1021,6 +1021,78 @@ return cast(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; +def X86TruncSStore : SDNode<"X86ISD::VTRUNCSTORES", SDTStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +def X86TruncUSStore : SDNode<"X86ISD::VTRUNCSTOREUS", SDTStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTMaskedStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTMaskedStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr), + (X86TruncSStore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; + +def truncstore_us_vi8 : PatFrag<(ops node:$val, node:$ptr), + (X86TruncUSStore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; + +def truncstore_s_vi16 : PatFrag<(ops node:$val, node:$ptr), + (X86TruncSStore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; + +def truncstore_us_vi16 : PatFrag<(ops node:$val, node:$ptr), + (X86TruncUSStore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; + +def truncstore_s_vi32 : PatFrag<(ops node:$val, node:$ptr), + (X86TruncSStore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; + +def truncstore_us_vi32 : PatFrag<(ops node:$val, node:$ptr), + (X86TruncUSStore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; + +def masked_truncstore_s_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; + +def masked_truncstore_us_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; + +def masked_truncstore_s_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; + +def masked_truncstore_us_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; + +def masked_truncstore_s_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; + +def masked_truncstore_us_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{ + return cast(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; + def assertzext_i1 : PatFrag<(ops node:$src), (assertzext node:$src), [{ return cast(N->getOperand(1))->getVT() == MVT::i1; Index: ../lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- ../lib/Target/X86/X86IntrinsicsInfo.h +++ ../lib/Target/X86/X86IntrinsicsInfo.h @@ -185,6 +185,79 @@ X86ISD::VTRUNC, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_128, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_256, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_512, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_128, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_256, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_512, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_128, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_256, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_512, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_128, TRUNCATE_TO_MEM_VI32, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_256, TRUNCATE_TO_MEM_VI32, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_512, TRUNCATE_TO_MEM_VI32, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_128, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_256, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_512, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_128, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_256, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_512, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_128, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_256, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_512, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_128, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_256, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_512, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_128, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_256, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_512, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_128, TRUNCATE_TO_MEM_VI32, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_256, TRUNCATE_TO_MEM_VI32, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_512, TRUNCATE_TO_MEM_VI32, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_128, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_256, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_512, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_128, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_256, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_512, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0), Index: ../test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- ../test/CodeGen/X86/avx512-intrinsics.ll +++ ../test/CodeGen/X86/avx512-intrinsics.ll @@ -2912,8 +2912,8 @@ define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) @@ -2946,8 +2946,8 @@ define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) @@ -3014,8 +3014,8 @@ define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) @@ -3048,8 +3048,8 @@ define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) @@ -3116,8 +3116,8 @@ define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) @@ -3150,8 +3150,8 @@ define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) @@ -3218,8 +3218,8 @@ define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) @@ -3252,8 +3252,8 @@ define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) @@ -3320,8 +3320,8 @@ define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) @@ -3354,8 +3354,8 @@ define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) Index: ../test/CodeGen/X86/avx512bw-intrinsics.ll =================================================================== --- ../test/CodeGen/X86/avx512bw-intrinsics.ll +++ ../test/CodeGen/X86/avx512bw-intrinsics.ll @@ -2010,18 +2010,17 @@ define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) ; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: retq ; ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: ; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) {%k1} +; AVX512F-32-NEXT: vpmovswb %zmm0, (%eax) +; AVX512F-32-NEXT: vpmovswb %zmm0, (%eax) {%k1} ; AVX512F-32-NEXT: retl call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) @@ -2063,18 +2062,17 @@ define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) ; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: retq ; ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: ; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) {%k1} +; AVX512F-32-NEXT: vpmovuswb %zmm0, (%eax) +; AVX512F-32-NEXT: vpmovuswb %zmm0, (%eax) {%k1} ; AVX512F-32-NEXT: retl call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) Index: ../test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- ../test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ ../test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -4090,8 +4090,8 @@ define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07] ; CHECK-NEXT: vpmovswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x20,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) @@ -4124,8 +4124,8 @@ define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovuswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovuswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07] ; CHECK-NEXT: vpmovuswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) @@ -4192,8 +4192,8 @@ define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07] ; CHECK-NEXT: vpmovswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x20,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) @@ -4226,8 +4226,8 @@ define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovuswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovuswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07] ; CHECK-NEXT: vpmovuswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) Index: ../test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- ../test/CodeGen/X86/avx512vl-intrinsics.ll +++ ../test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2040,8 +2040,8 @@ define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x22,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x22,0x07] ; CHECK-NEXT: vpmovsqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x22,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) @@ -2074,8 +2074,8 @@ define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x12,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x12,0x07] ; CHECK-NEXT: vpmovusqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x12,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) @@ -2142,8 +2142,8 @@ define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x22,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x22,0x07] ; CHECK-NEXT: vpmovsqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x22,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) @@ -2176,8 +2176,8 @@ define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x12,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x12,0x07] ; CHECK-NEXT: vpmovusqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x12,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) @@ -2244,8 +2244,8 @@ define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x24,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x24,0x07] ; CHECK-NEXT: vpmovsqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x24,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) @@ -2278,8 +2278,8 @@ define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x14,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x14,0x07] ; CHECK-NEXT: vpmovusqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x14,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) @@ -2346,8 +2346,8 @@ define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x24,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x24,0x07] ; CHECK-NEXT: vpmovsqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x24,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) @@ -2380,8 +2380,8 @@ define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x14,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x14,0x07] ; CHECK-NEXT: vpmovusqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x14,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) @@ -2448,8 +2448,8 @@ define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0x07] ; CHECK-NEXT: vpmovsqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x25,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) @@ -2482,8 +2482,8 @@ define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0x07] ; CHECK-NEXT: vpmovusqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x15,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) @@ -2550,8 +2550,8 @@ define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0x07] ; CHECK-NEXT: vpmovsqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x25,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) @@ -2584,8 +2584,8 @@ define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0x07] ; CHECK-NEXT: vpmovusqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x15,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) @@ -2652,8 +2652,8 @@ define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x21,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x21,0x07] ; CHECK-NEXT: vpmovsdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x21,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) @@ -2686,8 +2686,8 @@ define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x11,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x11,0x07] ; CHECK-NEXT: vpmovusdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) @@ -2754,8 +2754,8 @@ define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x21,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x21,0x07] ; CHECK-NEXT: vpmovsdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x21,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) @@ -2788,8 +2788,8 @@ define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x11,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x11,0x07] ; CHECK-NEXT: vpmovusdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) @@ -2856,8 +2856,8 @@ define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x23,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x23,0x07] ; CHECK-NEXT: vpmovsdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x23,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) @@ -2890,8 +2890,8 @@ define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x13,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x13,0x07] ; CHECK-NEXT: vpmovusdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x13,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) @@ -2958,8 +2958,8 @@ define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x23,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x23,0x07] ; CHECK-NEXT: vpmovsdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x23,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) @@ -2992,8 +2992,8 @@ define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x13,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x13,0x07] ; CHECK-NEXT: vpmovusdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x13,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)