Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3605,6 +3605,7 @@ SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDLoc dl(N); + EVT VT = N->getValueType(0); // WARNING: In this code we widen the compare instruction with garbage. // This garbage may contain denormal floats which may be slow. Is this a real @@ -3614,8 +3615,13 @@ // Only some of the compared elements are legal. EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), InOp0.getValueType()); + // The result type is legal, if its vXi1, keep vXi1 for the new SETCC. + if (VT.getScalarType() == MVT::i1) + SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + SVT.getVectorNumElements()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), - SVT, InOp0, InOp1, N->getOperand(2)); + SVT, InOp0, InOp1, N->getOperand(2)); // Extract the needed results from the result vector. EVT ResVT = EVT::getVectorVT(*DAG.getContext(), Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -460,7 +460,7 @@ // this happens we will use 512-bit operations and the mask will not be // zero extended. EVT OpVT = N->getOperand(0).getValueType(); - if (OpVT == MVT::v8i32 || OpVT == MVT::v8f32) + if (OpVT.is256BitVector() || OpVT.is128BitVector()) return Subtarget->hasVLX(); return true; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1144,6 +1144,8 @@ addRegisterClass(MVT::v8f64, &X86::VR512RegClass); addRegisterClass(MVT::v1i1, &X86::VK1RegClass); + addRegisterClass(MVT::v2i1, &X86::VK2RegClass); + addRegisterClass(MVT::v4i1, &X86::VK4RegClass); addRegisterClass(MVT::v8i1, &X86::VK8RegClass); addRegisterClass(MVT::v16i1, &X86::VK16RegClass); @@ -1167,8 +1169,14 @@ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v8i16, Custom); setOperationAction(ISD::ANY_EXTEND, MVT::v8i16, Custom); - - for (auto VT : { MVT::v8i1, MVT::v16i1 }) { + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Custom); + + for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) { setOperationAction(ISD::ADD, VT, Custom); setOperationAction(ISD::SUB, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); @@ -1184,6 +1192,9 @@ } setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, @@ -1520,39 +1531,6 @@ } if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { - addRegisterClass(MVT::v4i1, &X86::VK4RegClass); - addRegisterClass(MVT::v2i1, &X86::VK2RegClass); - - for (auto VT : { MVT::v2i1, MVT::v4i1 }) { - setOperationAction(ISD::ADD, VT, Custom); - setOperationAction(ISD::SUB, VT, Custom); - setOperationAction(ISD::MUL, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Expand); - - setOperationAction(ISD::TRUNCATE, VT, Custom); - setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - } - - // TODO: v8i1 concat should be legal without VLX to support concats of - // v1i1, but we won't legalize it correctly currently without introducing - // a v4i1 concat in the middle. - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); - - // Extends from v2i1/v4i1 masks to 128-bit vectors. - setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Custom); - setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); @@ -4920,8 +4898,6 @@ } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type"); - assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && - "Unexpected vector type"); Vec = DAG.getConstant(0, dl, VT); } else { unsigned Num32BitElts = VT.getSizeInBits() / 32; @@ -17653,6 +17629,19 @@ assert(EltVT == MVT::f32 || EltVT == MVT::f64); #endif + // Custom widen MVT::v2f32 to avoid to prevent the default widening + // from getting a result type of v4i32, extracting it to v2i32 and then + // trying to sign extend that to v2i1. + if (VT == MVT::v2i1 && Op1.getValueType() == MVT::v2f32) { + Op0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Op0, + DAG.getUNDEF(MVT::v2f32)); + Op1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Op1, + DAG.getUNDEF(MVT::v2f32)); + SDValue NewOp = DAG.getNode(ISD::SETCC, dl, MVT::v4i1, Op0, Op1, CC); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, NewOp, + DAG.getIntPtrConstant(0, dl)); + } + unsigned Opc; if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) { assert(VT.getVectorNumElements() <= 16); @@ -24268,8 +24257,8 @@ // Mask // At this point we have promoted mask operand - assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); - MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); + assert(MaskVT.getScalarType() == MVT::i1 && "unexpected mask type"); + MVT ExtMaskVT = MVT::getVectorVT(MVT::i1, NumElts); // Use the original mask here, do not modify the mask twice Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true); @@ -24278,12 +24267,9 @@ Src = ExtendToType(Src, NewVT, DAG); } } - // If the mask is "wide" at this point - truncate it to i1 vector - MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts); - Mask = DAG.getNode(ISD::TRUNCATE, dl, BitMaskVT, Mask); // The mask is killed by scatter, add it to the values - SDVTList VTs = DAG.getVTList(BitMaskVT, MVT::Other); + SDVTList VTs = DAG.getVTList(Mask.getValueType(), MVT::Other); SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index}; SDValue NewScatter = DAG.getTargetMemSDNode( VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand()); @@ -24306,11 +24292,6 @@ assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) && "Expanding masked load is supported for 32 and 64-bit types only!"); - // 4x32, 4x64 and 2x64 vectors of non-expanding loads are legal regardless of - // VLX. These types for exp-loads are handled here. - if (!N->isExpandingLoad() && VT.getVectorNumElements() <= 4) - return Op; - assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked load op."); @@ -24327,16 +24308,12 @@ Src0 = ExtendToType(Src0, WideDataVT, DAG); // Mask element has to be i1. - MVT MaskEltTy = Mask.getSimpleValueType().getScalarType(); - assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) && - "We handle 4x32, 4x64 and 2x64 vectors only in this case"); + assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && + "Unexpected mask type"); - MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); - if (MaskEltTy != MVT::i1) - Mask = DAG.getNode(ISD::TRUNCATE, dl, - MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask); SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), N->getMemOperand(), @@ -24365,10 +24342,6 @@ assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) && "Expanding masked load is supported for 32 and 64-bit types only!"); - // 4x32 and 2x64 vectors of non-compressing stores are legal regardless to VLX. - if (!N->isCompressingStore() && VT.getVectorNumElements() <= 4) - return Op; - assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked store op."); @@ -24383,17 +24356,13 @@ MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); // Mask element has to be i1. - MVT MaskEltTy = Mask.getSimpleValueType().getScalarType(); - assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) && - "We handle 4x32, 4x64 and 2x64 vectors only in this case"); + assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && + "Unexpected mask type"); - MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); - if (MaskEltTy != MVT::i1) - Mask = DAG.getNode(ISD::TRUNCATE, dl, - MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask); return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), Mask, N->getMemoryVT(), N->getMemOperand(), N->isTruncatingStore(), N->isCompressingStore()); @@ -24443,12 +24412,9 @@ Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); // Mask - MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts); - // At this point we have promoted mask operand - assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); - MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); - Mask = ExtendToType(Mask, ExtMaskVT, DAG, true); - Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask); + assert(MaskVT.getScalarType() == MVT::i1 && "unexpected mask type"); + MaskVT = MVT::getVectorVT(MVT::i1, NumElts); + Mask = ExtendToType(Mask, MaskVT, DAG, true); // The pass-through value MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts); @@ -24456,7 +24422,7 @@ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; SDValue NewGather = DAG.getTargetMemSDNode( - DAG.getVTList(NewVT, MaskBitVT, MVT::Other), Ops, dl, N->getMemoryVT(), + DAG.getVTList(NewVT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(), N->getMemOperand()); SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewGather.getValue(0), @@ -31417,10 +31383,16 @@ if (Subtarget.hasAVX512() && CondVT.isVector() && CondVT.getVectorElementType() == MVT::i1 && (VT.is128BitVector() || VT.is256BitVector()) && - (VT.getVectorElementType() == MVT::i8 || - VT.getVectorElementType() == MVT::i16) && - !(Subtarget.hasBWI() && Subtarget.hasVLX())) { - Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); + (((VT.getVectorElementType() == MVT::i32 || + VT.getVectorElementType() == MVT::i64 || + VT.getVectorElementType() == MVT::f32 || + VT.getVectorElementType() == MVT::f64) && + !Subtarget.hasVLX()) || + ((VT.getVectorElementType() == MVT::i8 || + VT.getVectorElementType() == MVT::i16) && + !(Subtarget.hasBWI() && Subtarget.hasVLX())))) { + EVT ExtVT = VT.changeVectorElementTypeToInteger(); + Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, Cond); DCI.AddToWorklist(Cond.getNode()); return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS); } Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -2979,46 +2979,77 @@ defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>; defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>; -multiclass axv512_icmp_packed_no_vlx_lowering { -def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrr) - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; - -def : Pat<(v8i1 (and VK8:$mask, - (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))), +multiclass axv512_icmp_packed_no_vlx_lowering { +def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2))), + (COPY_TO_REGCLASS + (!cast(InstStr##Zrr) + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), + Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2)))), (COPY_TO_REGCLASS (!cast(InstStr##Zrrk) - (COPY_TO_REGCLASS VK8:$mask, VK16), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), - VK8)>; + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), + Narrow.KRC)>; } multiclass axv512_icmp_packed_cc_no_vlx_lowering { -def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (!cast(InstStr##Zrri) - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; - -def : Pat<(v8i1 (and VK8:$mask, (OpNode (_.info256.VT VR256X:$src1), - (_.info256.VT VR256X:$src2), imm:$cc))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) - (COPY_TO_REGCLASS VK8:$mask, VK16), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; + X86VectorVTInfo Narrow, + X86VectorVTInfo Wide> { +def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), imm:$cc)), + (COPY_TO_REGCLASS + (!cast(InstStr##Zrri) + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), + imm:$cc), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), imm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), + imm:$cc), Narrow.KRC)>; } let Predicates = [HasAVX512, NoVLX] in { - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; } // Mask setting all 0s or 1s Index: lib/Target/X86/X86InstrVecCompiler.td =================================================================== --- lib/Target/X86/X86InstrVecCompiler.td +++ lib/Target/X86/X86InstrVecCompiler.td @@ -495,6 +495,18 @@ // If the bits are not zero we have to fall back to explicitly zeroing by // using shifts. +let Predicates = [HasAVX512] in { + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + (v2i1 VK2:$mask), (iPTR 0))), + (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16), + (i8 14)), (i8 14))>; + + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + (v4i1 VK4:$mask), (iPTR 0))), + (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16), + (i8 12)), (i8 12))>; +} + let Predicates = [HasAVX512, NoDQI] in { def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), (v8i1 VK8:$mask), (iPTR 0))), @@ -506,9 +518,7 @@ def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), (v8i1 VK8:$mask), (iPTR 0))), (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>; -} -let Predicates = [HasVLX, HasDQI] in { def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), (v2i1 VK2:$mask), (iPTR 0))), (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8), @@ -519,17 +529,6 @@ (i8 4)), (i8 4))>; } -let Predicates = [HasVLX] in { - def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), - (v2i1 VK2:$mask), (iPTR 0))), - (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16), - (i8 14)), (i8 14))>; - def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), - (v4i1 VK4:$mask), (iPTR 0))), - (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16), - (i8 12)), (i8 12))>; -} - let Predicates = [HasBWI] in { def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), (v16i1 VK16:$mask), (iPTR 0))), Index: test/Analysis/CostModel/X86/cast.ll =================================================================== --- test/Analysis/CostModel/X86/cast.ll +++ test/Analysis/CostModel/X86/cast.ll @@ -8,11 +8,17 @@ define i32 @add(i32 %arg) { ; CHECK-LABEL: for function 'add' ; -- Same size registeres -- - ;CHECK: cost of 1 {{.*}} zext + ;CHECK-AVX512: cost of 12 {{.*}} zext + ;CHECK-AVX2: cost of 1 {{.*}} zext + ;CHECK-AVX: cost of 1 {{.*}} zext %A = zext <4 x i1> undef to <4 x i32> - ;CHECK: cost of 2 {{.*}} sext + ;CHECK-AVX512: cost of 12 {{.*}} sext + ;CHECK-AVX2: cost of 2 {{.*}} sext + ;CHECK-AVX: cost of 2 {{.*}} sext %B = sext <4 x i1> undef to <4 x i32> - ;CHECK: cost of 0 {{.*}} trunc + ;CHECK-AVX512: cost of 0 {{.*}} trunc + ;CHECK-AVX2: cost of 0 {{.*}} trunc + ;CHECK-AVX: cost of 0 {{.*}} trunc %C = trunc <4 x i32> undef to <4 x i1> ; -- Different size registers -- Index: test/CodeGen/X86/avx512-calling-conv.ll =================================================================== --- test/CodeGen/X86/avx512-calling-conv.ll +++ test/CodeGen/X86/avx512-calling-conv.ll @@ -57,21 +57,18 @@ } define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) { -; KNL-LABEL: test4: -; KNL: ## %bb.0: -; KNL-NEXT: vandps %xmm1, %xmm0, %xmm0 -; KNL-NEXT: retq -; -; SKX-LABEL: test4: -; SKX: ## %bb.0: -; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vpsrad $31, %xmm0, %xmm0 -; SKX-NEXT: retq +; ALL_X64-LABEL: test4: +; ALL_X64: ## %bb.0: +; ALL_X64-NEXT: vpand %xmm1, %xmm0, %xmm0 +; ALL_X64-NEXT: vpslld $31, %xmm0, %xmm0 +; ALL_X64-NEXT: vpsrad $31, %xmm0, %xmm0 +; ALL_X64-NEXT: retq ; ; KNL_X32-LABEL: test4: ; KNL_X32: ## %bb.0: -; KNL_X32-NEXT: vandps %xmm1, %xmm0, %xmm0 +; KNL_X32-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL_X32-NEXT: vpslld $31, %xmm0, %xmm0 +; KNL_X32-NEXT: vpsrad $31, %xmm0, %xmm0 ; KNL_X32-NEXT: retl %c = and <4 x i1>%a, %b ret <4 x i1> %c Index: test/CodeGen/X86/avx512-cvt.ll =================================================================== --- test/CodeGen/X86/avx512-cvt.ll +++ test/CodeGen/X86/avx512-cvt.ll @@ -701,9 +701,9 @@ define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) { ; NOVL-LABEL: f64to4f32_mask: ; NOVL: # %bb.0: +; NOVL-NEXT: vcvtpd2ps %ymm0, %xmm0 ; NOVL-NEXT: vpslld $31, %xmm1, %xmm1 ; NOVL-NEXT: vpsrad $31, %xmm1, %xmm1 -; NOVL-NEXT: vcvtpd2ps %ymm0, %xmm0 ; NOVL-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NOVL-NEXT: vzeroupper ; NOVL-NEXT: retq @@ -1591,12 +1591,15 @@ } define <4 x float> @sbto4f32(<4 x float> %a) { -; NOVL-LABEL: sbto4f32: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: sbto4f32: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NOVLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sbto4f32: ; VLDQ: # %bb.0: @@ -1614,19 +1617,30 @@ ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0 ; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: sbto4f32: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq %cmpres = fcmp ogt <4 x float> %a, zeroinitializer %1 = sitofp <4 x i1> %cmpres to <4 x float> ret <4 x float> %1 } define <4 x double> @sbto4f64(<4 x double> %a) { -; NOVL-LABEL: sbto4f64: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; NOVL-NEXT: vpmovqd %zmm0, %ymm0 -; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: sbto4f64: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0 +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sbto4f64: ; VLDQ: # %bb.0: @@ -1644,18 +1658,30 @@ ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; VLNODQ-NEXT: vcvtdq2pd %xmm0, %ymm0 ; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: sbto4f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512DQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX512DQ-NEXT: retq %cmpres = fcmp ogt <4 x double> %a, zeroinitializer %1 = sitofp <4 x i1> %cmpres to <4 x double> ret <4 x double> %1 } define <2 x float> @sbto2f32(<2 x float> %a) { -; NOVL-LABEL: sbto2f32: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: sbto2f32: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NOVLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sbto2f32: ; VLDQ: # %bb.0: @@ -1673,19 +1699,31 @@ ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0 ; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: sbto2f32: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq %cmpres = fcmp ogt <2 x float> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x float> ret <2 x float> %1 } define <2 x double> @sbto2f64(<2 x double> %a) { -; NOVL-LABEL: sbto2f64: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; NOVL-NEXT: vcvtdq2pd %xmm0, %xmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: sbto2f64: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sbto2f64: ; VLDQ: # %bb.0: @@ -1703,6 +1741,16 @@ ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0 ; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: sbto2f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq %cmpres = fcmp ogt <2 x double> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x double> ret <2 x double> %1 @@ -1925,10 +1973,12 @@ define <4 x float> @ubto4f32(<4 x i32> %a) { ; NOVL-LABEL: ubto4f32: ; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] -; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 +; NOVL-NEXT: vzeroupper ; NOVL-NEXT: retq ; ; VL-LABEL: ubto4f32: @@ -1946,9 +1996,10 @@ define <4 x double> @ubto4f64(<4 x i32> %a) { ; NOVL-LABEL: ubto4f64: ; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpsrld $31, %xmm0, %xmm0 +; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} ; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0 ; NOVL-NEXT: retq ; @@ -1969,14 +2020,10 @@ ; NOVL: # %bb.0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpextrb $8, %xmm0, %eax -; NOVL-NEXT: andl $1, %eax -; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1 -; NOVL-NEXT: vpextrb $0, %xmm0, %eax -; NOVL-NEXT: andl $1, %eax -; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 -; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; NOVL-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 +; NOVL-NEXT: vzeroupper ; NOVL-NEXT: retq ; ; VL-LABEL: ubto2f32: @@ -1997,10 +2044,8 @@ ; NOVL: # %bb.0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] -; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NOVL-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} ; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0 ; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NOVL-NEXT: vzeroupper Index: test/CodeGen/X86/avx512-ext.ll =================================================================== --- test/CodeGen/X86/avx512-ext.ll +++ test/CodeGen/X86/avx512-ext.ll @@ -300,9 +300,9 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_4x8mem_to_4x32: ; KNL: # %bb.0: +; KNL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; @@ -321,9 +321,9 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_4x8mem_to_4x32: ; KNL: # %bb.0: +; KNL-NEXT: vpmovsxbd (%rdi), %xmm1 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd (%rdi), %xmm1 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; @@ -342,13 +342,11 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_8x8mem_to_8x32: ; KNL: # %bb.0: -; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} -; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; KNL-NEXT: vpslld $31, %ymm0, %ymm0 +; KNL-NEXT: vpsrad $31, %ymm0, %ymm0 +; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_8x8mem_to_8x32: @@ -366,13 +364,11 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_8x8mem_to_8x32: ; KNL: # %bb.0: -; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovsxbd (%rdi), %ymm1 -; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} -; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; KNL-NEXT: vpslld $31, %ymm0, %ymm0 +; KNL-NEXT: vpsrad $31, %ymm0, %ymm0 +; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_8x8mem_to_8x32: @@ -490,9 +486,9 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_2x8mem_to_2x64: ; KNL: # %bb.0: +; KNL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; @@ -510,9 +506,9 @@ define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_2x8mem_to_2x64mask: ; KNL: # %bb.0: +; KNL-NEXT: vpmovsxbq (%rdi), %xmm1 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxbq (%rdi), %xmm1 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; @@ -540,10 +536,10 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_4x8mem_to_4x64: ; KNL: # %bb.0: +; KNL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; KNL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; KNL-NEXT: retq ; @@ -562,10 +558,10 @@ define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_4x8mem_to_4x64mask: ; KNL: # %bb.0: +; KNL-NEXT: vpmovsxbq (%rdi), %ymm1 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 ; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 -; KNL-NEXT: vpmovsxbq (%rdi), %ymm1 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; KNL-NEXT: retq ; @@ -646,9 +642,9 @@ define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_4x16mem_to_4x32: ; KNL: # %bb.0: +; KNL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; @@ -667,9 +663,9 @@ define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_4x16mem_to_4x32mask: ; KNL: # %bb.0: +; KNL-NEXT: vpmovsxwd (%rdi), %xmm1 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxwd (%rdi), %xmm1 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; @@ -699,13 +695,11 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_8x16mem_to_8x32: ; KNL: # %bb.0: -; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} -; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; KNL-NEXT: vpslld $31, %ymm0, %ymm0 +; KNL-NEXT: vpsrad $31, %ymm0, %ymm0 +; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_8x16mem_to_8x32: @@ -723,13 +717,11 @@ define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_8x16mem_to_8x32mask: ; KNL: # %bb.0: -; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 ; KNL-NEXT: vpmovsxwd (%rdi), %ymm1 -; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} -; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; KNL-NEXT: vpslld $31, %ymm0, %ymm0 +; KNL-NEXT: vpsrad $31, %ymm0, %ymm0 +; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_8x16mem_to_8x32mask: @@ -757,13 +749,11 @@ define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_8x16_to_8x32mask: ; KNL: # %bb.0: -; KNL-NEXT: vpmovsxwq %xmm1, %zmm1 -; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 -; KNL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} -; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; KNL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; KNL-NEXT: vpslld $31, %ymm1, %ymm1 +; KNL-NEXT: vpsrad $31, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_8x16_to_8x32mask: @@ -869,9 +859,9 @@ define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_2x16mem_to_2x64: ; KNL: # %bb.0: +; KNL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; @@ -890,9 +880,9 @@ define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_2x16mem_to_2x64mask: ; KNL: # %bb.0: +; KNL-NEXT: vpmovsxwq (%rdi), %xmm1 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxwq (%rdi), %xmm1 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; @@ -921,10 +911,10 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_4x16mem_to_4x64: ; KNL: # %bb.0: +; KNL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; KNL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; KNL-NEXT: retq ; @@ -943,10 +933,10 @@ define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_4x16mem_to_4x64mask: ; KNL: # %bb.0: +; KNL-NEXT: vpmovsxwq (%rdi), %ymm1 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 ; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 -; KNL-NEXT: vpmovsxwq (%rdi), %ymm1 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; KNL-NEXT: retq ; @@ -1056,9 +1046,9 @@ define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_2x32mem_to_2x64: ; KNL: # %bb.0: +; KNL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; @@ -1077,9 +1067,9 @@ define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_2x32mem_to_2x64mask: ; KNL: # %bb.0: +; KNL-NEXT: vpmovsxdq (%rdi), %xmm1 ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxdq (%rdi), %xmm1 ; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 ; KNL-NEXT: retq ; @@ -1108,10 +1098,10 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_4x32mem_to_4x64: ; KNL: # %bb.0: +; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; KNL-NEXT: retq ; @@ -1130,10 +1120,10 @@ define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_4x32mem_to_4x64mask: ; KNL: # %bb.0: +; KNL-NEXT: vpmovsxdq (%rdi), %ymm1 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 ; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 -; KNL-NEXT: vpmovsxdq (%rdi), %ymm1 ; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; KNL-NEXT: retq ; @@ -1171,10 +1161,10 @@ define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_4x32_to_4x64mask: ; KNL: # %bb.0: +; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 ; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero -; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 ; KNL-NEXT: retq ; Index: test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- test/CodeGen/X86/avx512-insert-extract.ll +++ test/CodeGen/X86/avx512-insert-extract.ll @@ -845,40 +845,20 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) { ; KNL-LABEL: test_iinsertelement_v4i1: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: setb %al -; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpextrb $4, %xmm0, %ecx -; KNL-NEXT: kmovw %ecx, %k0 -; KNL-NEXT: vpextrb $0, %xmm0, %ecx -; KNL-NEXT: andl $1, %ecx -; KNL-NEXT: kmovw %ecx, %k1 -; KNL-NEXT: kshiftrw $1, %k0, %k2 -; KNL-NEXT: kshiftlw $1, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k2 -; KNL-NEXT: kxorw %k0, %k2, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kxorw %k1, %k0, %k0 +; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; KNL-NEXT: kshiftrw $2, %k0, %k1 ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kxorw %k2, %k1, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $13, %k1, %k1 ; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k1 -; KNL-NEXT: vpextrb $12, %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def %al killed %al killed %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_iinsertelement_v4i1: @@ -906,18 +886,11 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) { ; KNL-LABEL: test_iinsertelement_v2i1: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: setb %al -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpextrb $0, %xmm0, %ecx -; KNL-NEXT: andl $1, %ecx -; KNL-NEXT: kmovw %ecx, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: korw %k0, %k1, %k0 +; KNL-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k1 ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kxorw %k2, %k1, %k1 @@ -926,6 +899,7 @@ ; KNL-NEXT: kxorw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def %al killed %al killed %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_iinsertelement_v2i1: @@ -953,15 +927,15 @@ define zeroext i8 @test_extractelement_v2i1(<2 x i64> %a, <2 x i64> %b) { ; KNL-LABEL: test_extractelement_v2i1: ; KNL: ## %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpextrb $0, %xmm0, %eax +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andb $1, %al ; KNL-NEXT: movb $4, %cl ; KNL-NEXT: subb %al, %cl ; KNL-NEXT: movzbl %cl, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_v2i1: @@ -982,15 +956,15 @@ define zeroext i8 @extractelement_v2i1_alt(<2 x i64> %a, <2 x i64> %b) { ; KNL-LABEL: extractelement_v2i1_alt: ; KNL: ## %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpextrb $0, %xmm0, %eax +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andb $1, %al ; KNL-NEXT: movb $4, %cl ; KNL-NEXT: subb %al, %cl ; KNL-NEXT: movzbl %cl, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: extractelement_v2i1_alt: @@ -1012,12 +986,13 @@ define zeroext i8 @test_extractelement_v4i1(<4 x i32> %a, <4 x i32> %b) { ; KNL-LABEL: test_extractelement_v4i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpextrd $3, %xmm0, %eax +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; KNL-NEXT: kshiftrw $3, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andl $1, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_v4i1: @@ -1551,14 +1526,15 @@ ; KNL-LABEL: test_extractelement_varible_v2i1: ; KNL: ## %bb.0: ; KNL-NEXT: ## kill: def %edi killed %edi def %rdi -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vextracti32x4 $0, %zmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $1, %edi -; KNL-NEXT: movl -24(%rsp,%rdi,8), %eax +; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax ; KNL-NEXT: andl $1, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_varible_v2i1: @@ -1581,14 +1557,15 @@ ; KNL-LABEL: test_extractelement_varible_v4i1: ; KNL: ## %bb.0: ; KNL-NEXT: ## kill: def %edi killed %edi def %rdi -; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vextracti32x4 $0, %zmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $3, %edi -; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax +; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax ; KNL-NEXT: andl $1, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_varible_v4i1: Index: test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -3004,19 +3004,9 @@ define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) { ; CHECK-LABEL: test_mask_vextractf32x4: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovd %edi, %xmm2 -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kshiftrw $3, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %eax -; CHECK-NEXT: kshiftrw $2, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %ecx -; CHECK-NEXT: kshiftrw $1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %edx -; CHECK-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2 -; CHECK-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 -; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1 -; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask) @@ -3029,19 +3019,8 @@ ; CHECK-LABEL: test_mask_vextracti64x4: ; CHECK: ## %bb.0: ; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1 -; CHECK-NEXT: vmovd %edi, %xmm2 -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kshiftrw $3, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %eax -; CHECK-NEXT: kshiftrw $2, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %ecx -; CHECK-NEXT: kshiftrw $1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %edx -; CHECK-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2 -; CHECK-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 -; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 -; CHECK-NEXT: vpmovsxdq %xmm2, %ymm2 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 1, <4 x i64> %b, i8 %mask) @@ -3053,20 +3032,9 @@ define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { ; CHECK-LABEL: test_maskz_vextracti32x4: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovd %edi, %xmm1 -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kshiftrw $3, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %eax -; CHECK-NEXT: kshiftrw $2, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %ecx -; CHECK-NEXT: kshiftrw $1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %edx -; CHECK-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; CHECK-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; CHECK-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 -; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 -; CHECK-NEXT: vpsrad $31, %xmm1, %xmm1 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask) Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -3064,7 +3064,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; CHECK-NEXT: vcvtpd2dq %zmm0, %ymm3 +; CHECK-NEXT: vblendvps %ymm2, %ymm3, %ymm1, %ymm1 ; CHECK-NEXT: vcvtpd2dq {rn-sae}, %zmm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -3080,7 +3082,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm3 +; CHECK-NEXT: vblendvps %ymm2, %ymm3, %ymm1, %ymm1 ; CHECK-NEXT: vcvtpd2ps {ru-sae}, %zmm0, %ymm0 ; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -3096,7 +3100,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; CHECK-NEXT: vcvtpd2udq {ru-sae}, %zmm0, %ymm3 +; CHECK-NEXT: vblendvps %ymm2, %ymm3, %ymm1, %ymm1 ; CHECK-NEXT: vcvtpd2udq {rn-sae}, %zmm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -3160,7 +3166,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm3 +; CHECK-NEXT: vblendvps %ymm2, %ymm3, %ymm1, %ymm1 ; CHECK-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -3192,7 +3200,9 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1} +; CHECK-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm3 +; CHECK-NEXT: vblendvps %ymm2, %ymm3, %ymm1, %ymm1 ; CHECK-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; CHECK-NEXT: retq Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -498,11 +498,15 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { ; KNL-LABEL: test4: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpmovqd %zmm0, %ymm0 -; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 -; KNL-NEXT: vpmovqd %zmm1, %ymm1 -; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: ## kill: def %ymm3 killed %ymm3 def %zmm3 +; KNL-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2 +; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; KNL-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; KNL-NEXT: kandnw %k0, %k1, %k1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -517,21 +521,29 @@ ; ; AVX512BW-LABEL: test4: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 -; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: ## kill: def %ymm3 killed %ymm3 def %zmm3 +; AVX512BW-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2 +; AVX512BW-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512BW-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; AVX512BW-NEXT: kandnw %k0, %k1, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test4: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512DQ-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 -; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512DQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX512DQ-NEXT: ## kill: def %ymm3 killed %ymm3 def %zmm3 +; AVX512DQ-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2 +; AVX512DQ-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512DQ-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512DQ-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; AVX512DQ-NEXT: kandnw %k0, %k1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %x_gt_y = icmp sgt <4 x i64> %x, %y @@ -544,9 +556,16 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { ; KNL-LABEL: test5: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 -; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; KNL-NEXT: ## kill: def %xmm3 killed %xmm3 def %zmm3 +; KNL-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 +; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; KNL-NEXT: kandnw %k1, %k0, %k1 +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test5: @@ -559,16 +578,30 @@ ; ; AVX512BW-LABEL: test5: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX512BW-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 -; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX512BW-NEXT: ## kill: def %xmm3 killed %xmm3 def %zmm3 +; AVX512BW-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512BW-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 +; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; AVX512BW-NEXT: kandnw %k1, %k0, %k1 +; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test5: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX512DQ-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 -; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX512DQ-NEXT: ## kill: def %xmm3 killed %xmm3 def %zmm3 +; AVX512DQ-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512DQ-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; AVX512DQ-NEXT: kandnw %k1, %k0, %k0 +; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 +; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %x_gt_y = icmp slt <2 x i64> %x, %y %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 @@ -795,10 +828,17 @@ ; KNL-LABEL: test11: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: jg LBB20_2 -; KNL-NEXT: ## %bb.1: -; KNL-NEXT: vmovaps %xmm1, %xmm0 -; KNL-NEXT: LBB20_2: +; KNL-NEXT: jg LBB20_1 +; KNL-NEXT: ## %bb.2: +; KNL-NEXT: vpslld $31, %xmm1, %xmm0 +; KNL-NEXT: jmp LBB20_3 +; KNL-NEXT: LBB20_1: +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 +; KNL-NEXT: LBB20_3: +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test11: @@ -818,19 +858,33 @@ ; AVX512BW-LABEL: test11: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi -; AVX512BW-NEXT: jg LBB20_2 -; AVX512BW-NEXT: ## %bb.1: -; AVX512BW-NEXT: vmovaps %xmm1, %xmm0 -; AVX512BW-NEXT: LBB20_2: +; AVX512BW-NEXT: jg LBB20_1 +; AVX512BW-NEXT: ## %bb.2: +; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0 +; AVX512BW-NEXT: jmp LBB20_3 +; AVX512BW-NEXT: LBB20_1: +; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512BW-NEXT: LBB20_3: +; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test11: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi -; AVX512DQ-NEXT: jg LBB20_2 -; AVX512DQ-NEXT: ## %bb.1: -; AVX512DQ-NEXT: vmovaps %xmm1, %xmm0 -; AVX512DQ-NEXT: LBB20_2: +; AVX512DQ-NEXT: jg LBB20_1 +; AVX512DQ-NEXT: ## %bb.2: +; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm0 +; AVX512DQ-NEXT: jmp LBB20_3 +; AVX512DQ-NEXT: LBB20_1: +; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512DQ-NEXT: LBB20_3: +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %mask = icmp sgt i32 %a1, %b1 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b @@ -1271,8 +1325,7 @@ define void @test22(<4 x i1> %a, <4 x i1>* %addr) { ; KNL-LABEL: test22: ; KNL: ## %bb.0: -; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL-NEXT: vpslld $31, %ymm0, %ymm0 +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) @@ -1288,8 +1341,7 @@ ; ; AVX512BW-LABEL: test22: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0 -; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) @@ -1298,8 +1350,7 @@ ; ; AVX512DQ-LABEL: test22: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0 -; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper @@ -1311,8 +1362,7 @@ define void @test23(<2 x i1> %a, <2 x i1>* %addr) { ; KNL-LABEL: test23: ; KNL: ## %bb.0: -; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 +; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) @@ -1328,8 +1378,7 @@ ; ; AVX512BW-LABEL: test23: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 -; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) @@ -1338,8 +1387,7 @@ ; ; AVX512DQ-LABEL: test23: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 -; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper @@ -1390,10 +1438,9 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; KNL-LABEL: store_v2i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 +; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL-NEXT: knotw %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: vzeroupper @@ -1409,10 +1456,9 @@ ; ; AVX512BW-LABEL: store_v2i1: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: knotw %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) ; AVX512BW-NEXT: vzeroupper @@ -1420,10 +1466,9 @@ ; ; AVX512DQ-LABEL: store_v2i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: knotw %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -1435,10 +1480,9 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; KNL-LABEL: store_v4i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpslld $31, %ymm0, %ymm0 +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: knotw %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: vzeroupper @@ -1454,10 +1498,9 @@ ; ; AVX512BW-LABEL: store_v4i1: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: knotw %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) ; AVX512BW-NEXT: vzeroupper @@ -1465,10 +1508,9 @@ ; ; AVX512DQ-LABEL: store_v4i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: knotw %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq Index: test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512-vec-cmp.ll +++ test/CodeGen/X86/avx512-vec-cmp.ll @@ -111,11 +111,8 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind { ; KNL-LABEL: test9: ; KNL: ## %bb.0: -; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 -; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 -; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 -; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 +; KNL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm2 +; KNL-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test9: @@ -131,11 +128,8 @@ define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind { ; KNL-LABEL: test10: ; KNL: ## %bb.0: -; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 -; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 -; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1 -; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} -; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 +; KNL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm2 +; KNL-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test10: @@ -629,12 +623,8 @@ define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind { ; KNL-LABEL: test35: ; KNL: ## %bb.0: -; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 -; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 -; KNL-NEXT: vmovups (%rdi), %ymm2 -; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1 -; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} -; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 +; KNL-NEXT: vcmpltps (%rdi), %ymm0, %ymm2 +; KNL-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test35: @@ -743,12 +733,9 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind { ; KNL-LABEL: test41: ; KNL: ## %bb.0: -; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 -; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL-NEXT: vbroadcastss (%rdi), %ymm2 -; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1 -; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} -; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 +; KNL-NEXT: vcmpltps %ymm2, %ymm0, %ymm2 +; KNL-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test41: Index: test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll @@ -7,13 +7,8 @@ ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512: ; CHECK: ## %bb.0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vmovd %edi, %xmm2 ; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kshiftrb $1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; CHECK-NEXT: vpsllq $63, %xmm2, %xmm2 -; CHECK-NEXT: vpsraq $63, %zmm2, %zmm2 +; CHECK-NEXT: vpmovm2q %k0, %zmm2 ; CHECK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 ; CHECK-NEXT: vandpd %xmm0, %xmm2, %xmm2 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 @@ -32,12 +27,13 @@ define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8: ; CHECK: ## %bb.0: -; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2 -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1 -; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z} -; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; CHECK-NEXT: kmovw %edi, %k0 +; CHECK-NEXT: vpmovm2d %k0, %zmm2 +; CHECK-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm1 +; CHECK-NEXT: vandps %ymm0, %ymm2, %ymm2 +; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ; CHECK-NEXT: retq %res = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3) %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3) Index: test/CodeGen/X86/avx512dq-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512dq-intrinsics.ll +++ test/CodeGen/X86/avx512dq-intrinsics.ll @@ -85,13 +85,23 @@ declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32) define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: +; AVX512DQ: ## %bb.0: +; AVX512DQ-NEXT: kmovw %edi, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm2 +; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm3 +; AVX512DQ-NEXT: vblendvps %ymm2, %ymm3, %ymm1, %ymm1 +; AVX512DQ-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 +; AVX512DQ-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512DQVL-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: +; AVX512DQVL: ## %bb.0: +; AVX512DQVL-NEXT: kmovw %edi, %k1 +; AVX512DQVL-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} +; AVX512DQVL-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 +; AVX512DQVL-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX512DQVL-NEXT: retq %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) %res2 = fadd <8 x float> %res, %res1 @@ -181,13 +191,23 @@ declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32) define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { -; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: -; CHECK: ## %bb.0: -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} -; CHECK-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 -; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq +; AVX512DQ-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: +; AVX512DQ: ## %bb.0: +; AVX512DQ-NEXT: kmovw %edi, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm2 +; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm3 +; AVX512DQ-NEXT: vblendvps %ymm2, %ymm3, %ymm1, %ymm1 +; AVX512DQ-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 +; AVX512DQ-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX512DQ-NEXT: retq +; +; AVX512DQVL-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: +; AVX512DQVL: ## %bb.0: +; AVX512DQVL-NEXT: kmovw %edi, %k1 +; AVX512DQVL-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} +; AVX512DQVL-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 +; AVX512DQVL-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX512DQVL-NEXT: retq %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) %res2 = fadd <8 x float> %res, %res1 Index: test/CodeGen/X86/avx512vl-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512vl-vec-cmp.ll +++ test/CodeGen/X86/avx512vl-vec-cmp.ll @@ -45,12 +45,10 @@ ; ; NoVLX-LABEL: test256_3: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 -; NoVLX-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 +; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0 +; NoVLX-NEXT: vblendvps %ymm0, %ymm2, %ymm1, %ymm0 ; NoVLX-NEXT: retq %mask = icmp sge <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x1, <8 x i32> %y @@ -86,12 +84,8 @@ ; ; NoVLX-LABEL: test256_5: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vmovdqu (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp eq <8 x i32> %x, %y @@ -108,12 +102,8 @@ ; ; NoVLX-LABEL: test256_5b: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vmovdqu (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpeqd %zmm0, %zmm2, %k1 -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp eq <8 x i32> %y, %x @@ -130,12 +120,8 @@ ; ; NoVLX-LABEL: test256_6: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vmovdqu (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp sgt <8 x i32> %x, %y @@ -152,12 +138,8 @@ ; ; NoVLX-LABEL: test256_6b: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vmovdqu (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp slt <8 x i32> %y, %x @@ -174,12 +156,10 @@ ; ; NoVLX-LABEL: test256_7: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vmovdqu (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1 -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm2 +; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 +; NoVLX-NEXT: vpxor %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp sle <8 x i32> %x, %y @@ -196,12 +176,10 @@ ; ; NoVLX-LABEL: test256_7b: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vmovdqu (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1 -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm2 +; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 +; NoVLX-NEXT: vpxor %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp sge <8 x i32> %y, %x @@ -218,12 +196,9 @@ ; ; NoVLX-LABEL: test256_8: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vmovdqu (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpleud %zmm2, %zmm0, %k1 -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpminud (%rdi), %ymm0, %ymm2 +; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp ule <8 x i32> %x, %y @@ -240,12 +215,10 @@ ; ; NoVLX-LABEL: test256_8b: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqu (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpnltud %zmm0, %zmm2, %k1 -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpmaxud %ymm0, %ymm2, %ymm3 +; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 %mask = icmp uge <8 x i32> %y, %x @@ -263,14 +236,10 @@ ; ; NoVLX-LABEL: test256_9: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm3 killed %ymm3 def %zmm3 -; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm3 +; NoVLX-NEXT: vpand %ymm2, %ymm3, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %mask1 = icmp eq <8 x i32> %x1, %y1 %mask0 = icmp eq <8 x i32> %x, %y @@ -289,10 +258,10 @@ ; ; NoVLX-LABEL: test256_10: ; NoVLX: # %bb.0: +; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm3 ; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 ; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1 ; NoVLX-NEXT: vpandn %ymm3, %ymm1, %ymm1 ; NoVLX-NEXT: vblendvpd %ymm1, %ymm0, %ymm2, %ymm0 ; NoVLX-NEXT: retq @@ -336,14 +305,11 @@ ; ; NoVLX-LABEL: test256_12: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vmovdqu (%rdi), %ymm3 -; NoVLX-NEXT: vpcmpleud %zmm3, %zmm0, %k1 -; NoVLX-NEXT: vpcmpled %zmm1, %zmm2, %k1 {%k1} -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm2 +; NoVLX-NEXT: vpminud (%rdi), %ymm0, %ymm3 +; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm0, %ymm3 +; NoVLX-NEXT: vpandn %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <8 x i32> %x1, %y1 %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4 @@ -383,12 +349,11 @@ ; ; NoVLX-LABEL: test256_14: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1 -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm2 +; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 +; NoVLX-NEXT: vpxor %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0 @@ -408,14 +373,11 @@ ; ; NoVLX-LABEL: test256_15: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm3 -; NoVLX-NEXT: vpcmpgtd %zmm3, %zmm0, %k1 -; NoVLX-NEXT: vpcmpled %zmm1, %zmm2, %k1 {%k1} -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtd %ymm3, %ymm0, %ymm3 +; NoVLX-NEXT: vpandn %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <8 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 @@ -437,8 +399,8 @@ ; ; NoVLX-LABEL: test256_16: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm2 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm3 +; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm2 ; NoVLX-NEXT: vpcmpgtq %ymm3, %ymm0, %ymm3 ; NoVLX-NEXT: vpandn %ymm3, %ymm2, %ymm2 ; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 @@ -462,12 +424,10 @@ ; ; NoVLX-LABEL: test256_17: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vmovdqu (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm2 +; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 +; NoVLX-NEXT: vpxor %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp ne <8 x i32> %x, %y @@ -484,12 +444,10 @@ ; ; NoVLX-LABEL: test256_18: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vmovdqu (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpneqd %zmm0, %zmm2, %k1 -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm2 +; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 +; NoVLX-NEXT: vpxor %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp ne <8 x i32> %y, %x @@ -506,12 +464,9 @@ ; ; NoVLX-LABEL: test256_19: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vmovdqu (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpnltud %zmm2, %zmm0, %k1 -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpmaxud (%rdi), %ymm0, %ymm2 +; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm0, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp uge <8 x i32> %x, %y @@ -528,12 +483,10 @@ ; ; NoVLX-LABEL: test256_20: ; NoVLX: # %bb.0: -; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqu (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpnltud %zmm0, %zmm2, %k1 -; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; NoVLX-NEXT: vpmaxud %ymm0, %ymm2, %ymm3 +; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: retq %y = load <8 x i32>, <8 x i32>* %yp, align 4 %mask = icmp uge <8 x i32> %y, %x @@ -797,10 +750,10 @@ ; ; NoVLX-LABEL: test128_10: ; NoVLX: # %bb.0: +; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm3 ; NoVLX-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 ; NoVLX-NEXT: vpxor %xmm4, %xmm3, %xmm3 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 ; NoVLX-NEXT: vpandn %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 ; NoVLX-NEXT: retq @@ -912,8 +865,8 @@ ; ; NoVLX-LABEL: test128_15: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm2 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm3 +; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm2 ; NoVLX-NEXT: vpcmpgtd %xmm3, %xmm0, %xmm3 ; NoVLX-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 @@ -938,8 +891,8 @@ ; ; NoVLX-LABEL: test128_16: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm2 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm3 +; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm2 ; NoVLX-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 ; NoVLX-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 Index: test/CodeGen/X86/avx512vl-vec-masked-cmp.ll =================================================================== --- test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -2373,37 +2373,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2424,37 +2401,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2477,50 +2431,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2545,50 +2464,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2614,38 +2498,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2669,51 +2529,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2740,37 +2564,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2791,37 +2592,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2844,50 +2622,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2912,50 +2655,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2981,38 +2689,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -3036,51 +2720,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -3113,13 +2761,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3154,13 +2812,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3197,8 +2865,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3206,17 +2876,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3255,8 +2921,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3264,17 +2932,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3314,14 +2978,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3359,9 +3032,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3369,17 +3043,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3420,16 +3090,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3467,16 +3146,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3516,8 +3204,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3525,20 +3215,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3580,8 +3265,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3589,20 +3276,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3645,17 +3327,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3696,9 +3386,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3706,20 +3397,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -5339,10 +5025,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -5367,10 +5054,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -5397,17 +5085,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -5436,17 +5119,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -5476,11 +5154,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -5508,18 +5186,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -5550,23 +5222,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5587,23 +5250,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5626,30 +5280,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5674,30 +5313,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5723,24 +5347,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5764,31 +5378,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5815,23 +5413,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5852,23 +5441,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5891,30 +5471,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5939,30 +5504,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5988,24 +5538,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -6029,31 +5569,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -6086,13 +5610,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6127,13 +5655,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6170,20 +5702,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6222,20 +5752,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6275,14 +5803,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6320,21 +5851,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6375,16 +5903,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6422,16 +5953,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6471,23 +6005,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6529,23 +6060,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6588,17 +6116,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6639,24 +6169,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6695,36 +6221,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6749,36 +6250,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6805,49 +6281,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6876,49 +6315,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6948,37 +6350,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -7006,50 +6382,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -7080,36 +6418,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7134,36 +6447,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7190,49 +6478,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7261,49 +6512,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7333,37 +6547,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7391,50 +6579,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7471,14 +6621,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7514,14 +6673,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7559,9 +6727,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7569,17 +6738,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7619,9 +6784,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7629,17 +6795,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7680,15 +6842,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7727,10 +6897,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7738,17 +6908,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7790,17 +6956,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7839,17 +7013,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7890,9 +7072,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7900,20 +7083,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7956,9 +7134,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7966,20 +7145,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -8023,18 +7197,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -8076,10 +7257,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -8087,20 +7268,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -11483,37 +10659,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11534,37 +10687,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11587,50 +10717,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11655,50 +10750,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11724,38 +10784,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11779,51 +10815,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11850,37 +10850,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11901,37 +10878,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11954,50 +10908,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -12022,50 +10941,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -12091,38 +10975,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -12146,51 +11006,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -12223,13 +11047,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12264,13 +11098,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12307,8 +11151,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12316,17 +11162,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12365,8 +11207,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12374,17 +11218,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12424,14 +11264,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12469,9 +11318,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12479,17 +11329,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12530,16 +11376,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12577,16 +11432,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12626,8 +11490,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12635,20 +11501,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12690,8 +11551,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12699,20 +11562,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12755,17 +11613,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12806,9 +11672,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12816,20 +11683,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -14449,10 +13311,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -14477,10 +13340,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -14507,17 +13371,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -14546,17 +13405,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -14586,11 +13440,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -14618,18 +13472,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -14660,23 +13508,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14697,23 +13536,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14736,30 +13566,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14784,30 +13599,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14833,24 +13633,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14874,31 +13664,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14925,23 +13699,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14962,23 +13727,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -15001,30 +13757,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -15049,30 +13790,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -15098,24 +13824,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -15139,31 +13855,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -15196,13 +13896,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15237,13 +13941,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15280,20 +13988,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15332,20 +14038,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15385,14 +14089,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15430,21 +14137,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15485,16 +14189,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15532,16 +14239,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15581,23 +14291,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15639,23 +14346,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15698,17 +14402,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15749,24 +14455,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15805,36 +14507,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -15859,36 +14536,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -15915,49 +14567,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -15986,49 +14601,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -16058,37 +14636,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -16116,50 +14668,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -16190,36 +14704,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16244,36 +14733,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16300,49 +14764,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16371,49 +14798,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16443,37 +14833,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16501,50 +14865,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16581,14 +14907,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16624,14 +14959,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16669,9 +15013,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -16679,17 +15024,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16729,9 +15070,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -16739,17 +15081,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16790,15 +15128,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16837,10 +15183,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -16848,17 +15194,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16900,17 +15242,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16949,17 +15299,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -17000,9 +15358,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -17010,20 +15369,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -17066,9 +15420,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -17076,20 +15431,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -17133,18 +15483,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -17186,10 +15543,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -17197,20 +15554,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -20689,39 +19041,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20742,40 +19069,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20798,50 +19099,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20866,51 +19132,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20936,40 +19166,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20993,51 +19197,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21064,39 +19232,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21117,40 +19260,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21173,50 +19290,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21241,51 +19323,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21311,40 +19357,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21368,51 +19388,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21445,15 +19429,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21488,16 +19480,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21534,8 +19533,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21543,17 +19544,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21592,9 +19589,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21602,17 +19600,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21652,16 +19646,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21699,9 +19700,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21709,17 +19711,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21760,18 +19758,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21809,19 +19814,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21861,8 +19872,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21870,20 +19883,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21925,9 +19933,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21935,20 +19944,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21991,19 +19995,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -22044,9 +20054,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -22054,20 +20065,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -23687,12 +21693,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -23717,13 +21722,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -23750,17 +21753,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -23789,18 +21787,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -23830,13 +21822,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -23864,18 +21854,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -23906,25 +21890,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -23945,26 +21918,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -23987,30 +21948,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24035,31 +21981,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24085,26 +22015,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24128,31 +22046,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24179,25 +22081,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24218,26 +22109,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24260,30 +22139,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24308,31 +22172,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24358,26 +22206,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24401,31 +22237,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24458,15 +22278,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24501,16 +22323,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24547,20 +22370,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24599,21 +22420,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24653,16 +22471,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24700,21 +22519,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24755,18 +22571,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24804,19 +22621,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24856,23 +22673,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24914,24 +22728,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24974,19 +22784,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25027,24 +22837,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25083,38 +22889,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25139,39 +22918,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25198,51 +22949,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25271,52 +22983,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25346,39 +23018,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25406,52 +23050,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25482,38 +23086,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25538,39 +23115,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25597,51 +23146,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25670,52 +23180,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25745,39 +23215,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25805,52 +23247,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25887,16 +23289,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25932,17 +23341,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25980,11 +23395,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -25992,17 +23406,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26042,12 +23452,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -26055,17 +23463,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26106,17 +23510,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26155,12 +23565,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -26168,17 +23576,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26220,19 +23624,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26271,20 +23681,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26325,11 +23740,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -26337,20 +23751,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26393,12 +23802,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -26406,20 +23813,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26463,20 +23865,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26518,12 +23925,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -26531,20 +23936,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -30043,40 +27443,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30097,40 +27471,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30153,53 +27501,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30224,53 +27534,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30296,41 +27568,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30354,54 +27599,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30428,40 +27634,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30482,40 +27662,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30538,53 +27692,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30609,53 +27725,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30681,41 +27759,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30739,54 +27790,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30819,16 +27831,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -30863,16 +27882,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -30909,11 +27935,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -30921,17 +27946,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -30970,11 +27991,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -30982,17 +28002,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31032,17 +28048,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31080,12 +28102,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -31093,17 +28113,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31144,19 +28160,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31194,19 +28216,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31246,11 +28274,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -31258,20 +28285,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31313,11 +28335,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -31325,20 +28346,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31381,20 +28397,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31435,12 +28456,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -31448,20 +28467,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -33081,13 +30095,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -33112,13 +30124,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -33145,20 +30155,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -33187,20 +30189,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -33230,14 +30224,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -33265,21 +30256,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -33310,26 +30292,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33350,26 +30320,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33392,33 +30350,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33443,33 +30383,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33495,27 +30417,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33539,34 +30448,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33593,26 +30483,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33633,26 +30511,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33675,33 +30541,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33726,33 +30574,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33778,27 +30608,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33822,34 +30639,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33882,16 +30680,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -33926,16 +30725,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -33972,23 +30772,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34027,23 +30822,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34083,17 +30873,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34131,24 +30921,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34189,19 +30973,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34239,19 +31023,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34291,26 +31075,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34352,26 +31130,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34414,20 +31186,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34468,27 +31239,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34527,39 +31291,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34584,39 +31320,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34643,52 +31351,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34717,52 +31385,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34792,40 +31420,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34853,53 +31452,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34930,39 +31488,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -34987,39 +31517,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35046,52 +31548,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35120,52 +31582,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35195,40 +31617,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35256,53 +31649,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35339,17 +31691,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35385,17 +31743,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35433,12 +31797,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35446,17 +31808,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35496,12 +31854,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35509,17 +31865,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35560,18 +31912,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35610,13 +31967,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35624,17 +31978,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35676,20 +32026,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35728,20 +32083,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35782,12 +32142,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35795,20 +32153,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35851,12 +32204,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35864,20 +32215,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35921,21 +32267,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35977,13 +32327,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35991,20 +32338,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37027,37 +33369,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37078,37 +33397,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovaps (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37130,38 +33426,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37186,40 +33458,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -37247,40 +33493,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vmovaps (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -37309,41 +33529,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 -; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -37373,37 +33566,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37424,37 +33594,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovaps (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37476,38 +33623,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37532,40 +33655,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -37593,40 +33690,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vmovaps (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -37655,41 +33726,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 -; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -37725,13 +33769,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37766,13 +33820,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovaps (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37808,14 +33872,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37854,18 +33927,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37904,18 +33985,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vmovaps (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37955,19 +34044,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 -; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -38008,16 +34104,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -38055,16 +34160,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovaps (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -38103,17 +34217,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -38155,21 +34277,28 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -38211,21 +34340,28 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vmovaps (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -38268,22 +34404,28 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 -; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -39997,10 +36139,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -40025,10 +36168,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -40054,11 +36198,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -40087,15 +36231,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -40124,15 +36267,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -40162,16 +36304,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax @@ -40202,23 +36342,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40239,23 +36370,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40277,24 +36399,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40319,26 +36431,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -40366,26 +36466,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -40414,27 +36502,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -40464,23 +36539,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40501,23 +36567,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40539,24 +36596,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40581,26 +36628,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -40628,26 +36663,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -40676,27 +36699,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -40732,13 +36742,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40773,13 +36787,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40815,14 +36833,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40861,18 +36882,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40911,18 +36934,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40962,19 +36987,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41015,16 +37041,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41062,16 +37091,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41110,17 +37142,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41162,21 +37196,22 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41218,21 +37253,22 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41275,22 +37311,22 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41329,36 +37365,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41383,36 +37394,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41438,37 +37424,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41497,41 +37457,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41560,41 +37493,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41624,42 +37530,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 -; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41690,36 +37568,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41744,36 +37597,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41799,37 +37627,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41858,41 +37660,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41921,41 +37696,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41985,42 +37733,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 -; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -42057,14 +37777,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42100,14 +37829,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42144,15 +37882,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42192,19 +37938,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42244,19 +37997,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42297,20 +38057,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 -; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42352,17 +38118,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42401,17 +38175,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42451,18 +38233,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42505,22 +38294,28 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42563,22 +38358,28 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vmovapd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42622,23 +38423,28 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 -; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -43857,40 +39663,14 @@ ; ; NoVLX-LABEL: mask_zero_lower: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $4, %k2, %k3 -; NoVLX-NEXT: kxorw %k1, %k3, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $11, %k1, %k1 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftrw $5, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $10, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $9, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $8, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq %cmp = icmp ult <4 x i32> %a, zeroinitializer %concat = shufflevector <4 x i1> %cmp, <4 x i1> zeroinitializer, <8 x i32> Index: test/CodeGen/X86/compress_expand.ll =================================================================== --- test/CodeGen/X86/compress_expand.ll +++ test/CodeGen/X86/compress_expand.ll @@ -200,11 +200,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL-NEXT: vmovdqa %ymm1, %ymm1 -; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL-NEXT: kshiftlw $12, %k0, %k0 +; KNL-NEXT: kshiftrw $12, %k0, %k1 ; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq call void @llvm.masked.compressstore.v4i64(<4 x i64> %V, i64* %base, <4 x i1> %mask) @@ -223,10 +221,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vpsllq $63, %xmm1, %xmm1 -; KNL-NEXT: vpsraq $63, %zmm1, %zmm1 -; KNL-NEXT: vmovdqa %xmm1, %xmm1 -; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL-NEXT: kshiftlw $14, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k1 ; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq call void @llvm.masked.compressstore.v2i64(<2 x i64> %V, i64* %base, <2 x i1> %mask) @@ -245,10 +242,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL-NEXT: vmovdqa %xmm1, %xmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL-NEXT: kshiftlw $12, %k0, %k0 +; KNL-NEXT: kshiftrw $12, %k0, %k1 ; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq call void @llvm.masked.compressstore.v4f32(<4 x float> %V, float* %base, <4 x i1> %mask) @@ -269,11 +265,9 @@ ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; KNL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; KNL-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL-NEXT: vmovaps %xmm1, %xmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL-NEXT: vpcmpeqq %zmm2, %zmm1, %k0 +; KNL-NEXT: kshiftlw $14, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k1 ; KNL-NEXT: vexpandps (%rdi), %zmm0 {%k1} ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq @@ -296,11 +290,9 @@ ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; KNL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; KNL-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL-NEXT: vmovaps %xmm1, %xmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL-NEXT: vpcmpeqq %zmm2, %zmm1, %k0 +; KNL-NEXT: kshiftlw $14, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k1 ; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq %mask = icmp eq <2 x i32> %trigger, zeroinitializer Index: test/CodeGen/X86/gpr-to-mask.ll =================================================================== --- test/CodeGen/X86/gpr-to-mask.ll +++ test/CodeGen/X86/gpr-to-mask.ll @@ -260,40 +260,38 @@ define void @test_shl1(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { ; X86-64-LABEL: test_shl1: ; X86-64: # %bb.0: # %entry -; X86-64-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; X86-64-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; X86-64-NEXT: testb $1, %dil ; X86-64-NEXT: je .LBB5_2 ; X86-64-NEXT: # %bb.1: # %if ; X86-64-NEXT: kmovb (%rsi), %k0 -; X86-64-NEXT: kaddb %k0, %k0, %k1 +; X86-64-NEXT: kaddb %k0, %k0, %k0 ; X86-64-NEXT: jmp .LBB5_3 ; X86-64-NEXT: .LBB5_2: # %else -; X86-64-NEXT: kmovb (%rdx), %k1 +; X86-64-NEXT: kmovb (%rdx), %k0 ; X86-64-NEXT: .LBB5_3: # %exit -; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; X86-64-NEXT: vmovaps %ymm1, (%rcx) +; X86-64-NEXT: vpmovm2d %k0, %zmm2 +; X86-64-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; X86-64-NEXT: vmovaps %ymm0, (%rcx) ; X86-64-NEXT: vzeroupper ; X86-64-NEXT: retq ; ; X86-32-LABEL: test_shl1: ; X86-32: # %bb.0: # %entry -; X86-32-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; X86-32-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) ; X86-32-NEXT: je .LBB5_2 ; X86-32-NEXT: # %bb.1: # %if ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-32-NEXT: kmovb (%ecx), %k0 -; X86-32-NEXT: kaddb %k0, %k0, %k1 +; X86-32-NEXT: kaddb %k0, %k0, %k0 ; X86-32-NEXT: jmp .LBB5_3 ; X86-32-NEXT: .LBB5_2: # %else ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-32-NEXT: kmovb (%ecx), %k1 +; X86-32-NEXT: kmovb (%ecx), %k0 ; X86-32-NEXT: .LBB5_3: # %exit -; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; X86-32-NEXT: vmovaps %ymm1, (%eax) +; X86-32-NEXT: vpmovm2d %k0, %zmm2 +; X86-32-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; X86-32-NEXT: vmovaps %ymm0, (%eax) ; X86-32-NEXT: vzeroupper ; X86-32-NEXT: retl entry: @@ -319,8 +317,6 @@ define void @test_shr1(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { ; X86-64-LABEL: test_shr1: ; X86-64: # %bb.0: # %entry -; X86-64-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; X86-64-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; X86-64-NEXT: testb $1, %dil ; X86-64-NEXT: je .LBB6_2 ; X86-64-NEXT: # %bb.1: # %if @@ -330,16 +326,15 @@ ; X86-64-NEXT: .LBB6_2: # %else ; X86-64-NEXT: movb (%rdx), %al ; X86-64-NEXT: .LBB6_3: # %exit -; X86-64-NEXT: kmovd %eax, %k1 -; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; X86-64-NEXT: vmovaps %ymm1, (%rcx) +; X86-64-NEXT: kmovd %eax, %k0 +; X86-64-NEXT: vpmovm2d %k0, %zmm2 +; X86-64-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; X86-64-NEXT: vmovaps %ymm0, (%rcx) ; X86-64-NEXT: vzeroupper ; X86-64-NEXT: retq ; ; X86-32-LABEL: test_shr1: ; X86-32: # %bb.0: # %entry -; X86-32-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; X86-32-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) ; X86-32-NEXT: je .LBB6_2 @@ -352,9 +347,10 @@ ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-32-NEXT: movb (%ecx), %cl ; X86-32-NEXT: .LBB6_3: # %exit -; X86-32-NEXT: kmovd %ecx, %k1 -; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; X86-32-NEXT: vmovaps %ymm1, (%eax) +; X86-32-NEXT: kmovd %ecx, %k0 +; X86-32-NEXT: vpmovm2d %k0, %zmm2 +; X86-32-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; X86-32-NEXT: vmovaps %ymm0, (%eax) ; X86-32-NEXT: vzeroupper ; X86-32-NEXT: retl entry: @@ -380,40 +376,38 @@ define void @test_shr2(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { ; X86-64-LABEL: test_shr2: ; X86-64: # %bb.0: # %entry -; X86-64-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; X86-64-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; X86-64-NEXT: testb $1, %dil ; X86-64-NEXT: je .LBB7_2 ; X86-64-NEXT: # %bb.1: # %if ; X86-64-NEXT: kmovb (%rsi), %k0 -; X86-64-NEXT: kshiftrb $2, %k0, %k1 +; X86-64-NEXT: kshiftrb $2, %k0, %k0 ; X86-64-NEXT: jmp .LBB7_3 ; X86-64-NEXT: .LBB7_2: # %else -; X86-64-NEXT: kmovb (%rdx), %k1 +; X86-64-NEXT: kmovb (%rdx), %k0 ; X86-64-NEXT: .LBB7_3: # %exit -; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; X86-64-NEXT: vmovaps %ymm1, (%rcx) +; X86-64-NEXT: vpmovm2d %k0, %zmm2 +; X86-64-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; X86-64-NEXT: vmovaps %ymm0, (%rcx) ; X86-64-NEXT: vzeroupper ; X86-64-NEXT: retq ; ; X86-32-LABEL: test_shr2: ; X86-32: # %bb.0: # %entry -; X86-32-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; X86-32-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) ; X86-32-NEXT: je .LBB7_2 ; X86-32-NEXT: # %bb.1: # %if ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-32-NEXT: kmovb (%ecx), %k0 -; X86-32-NEXT: kshiftrb $2, %k0, %k1 +; X86-32-NEXT: kshiftrb $2, %k0, %k0 ; X86-32-NEXT: jmp .LBB7_3 ; X86-32-NEXT: .LBB7_2: # %else ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-32-NEXT: kmovb (%ecx), %k1 +; X86-32-NEXT: kmovb (%ecx), %k0 ; X86-32-NEXT: .LBB7_3: # %exit -; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; X86-32-NEXT: vmovaps %ymm1, (%eax) +; X86-32-NEXT: vpmovm2d %k0, %zmm2 +; X86-32-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; X86-32-NEXT: vmovaps %ymm0, (%eax) ; X86-32-NEXT: vzeroupper ; X86-32-NEXT: retl entry: @@ -439,40 +433,38 @@ define void @test_shl(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { ; X86-64-LABEL: test_shl: ; X86-64: # %bb.0: # %entry -; X86-64-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; X86-64-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; X86-64-NEXT: testb $1, %dil ; X86-64-NEXT: je .LBB8_2 ; X86-64-NEXT: # %bb.1: # %if ; X86-64-NEXT: kmovb (%rsi), %k0 -; X86-64-NEXT: kshiftlb $6, %k0, %k1 +; X86-64-NEXT: kshiftlb $6, %k0, %k0 ; X86-64-NEXT: jmp .LBB8_3 ; X86-64-NEXT: .LBB8_2: # %else -; X86-64-NEXT: kmovb (%rdx), %k1 +; X86-64-NEXT: kmovb (%rdx), %k0 ; X86-64-NEXT: .LBB8_3: # %exit -; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; X86-64-NEXT: vmovaps %ymm1, (%rcx) +; X86-64-NEXT: vpmovm2d %k0, %zmm2 +; X86-64-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; X86-64-NEXT: vmovaps %ymm0, (%rcx) ; X86-64-NEXT: vzeroupper ; X86-64-NEXT: retq ; ; X86-32-LABEL: test_shl: ; X86-32: # %bb.0: # %entry -; X86-32-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; X86-32-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) ; X86-32-NEXT: je .LBB8_2 ; X86-32-NEXT: # %bb.1: # %if ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-32-NEXT: kmovb (%ecx), %k0 -; X86-32-NEXT: kshiftlb $6, %k0, %k1 +; X86-32-NEXT: kshiftlb $6, %k0, %k0 ; X86-32-NEXT: jmp .LBB8_3 ; X86-32-NEXT: .LBB8_2: # %else ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-32-NEXT: kmovb (%ecx), %k1 +; X86-32-NEXT: kmovb (%ecx), %k0 ; X86-32-NEXT: .LBB8_3: # %exit -; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; X86-32-NEXT: vmovaps %ymm1, (%eax) +; X86-32-NEXT: vpmovm2d %k0, %zmm2 +; X86-32-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; X86-32-NEXT: vmovaps %ymm0, (%eax) ; X86-32-NEXT: vzeroupper ; X86-32-NEXT: retl entry: @@ -498,27 +490,24 @@ define void @test_add(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) { ; X86-64-LABEL: test_add: ; X86-64: # %bb.0: # %entry -; X86-64-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; X86-64-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; X86-64-NEXT: kmovb (%rsi), %k0 ; X86-64-NEXT: kmovb (%rdx), %k1 ; X86-64-NEXT: testb $1, %dil ; X86-64-NEXT: je .LBB9_2 ; X86-64-NEXT: # %bb.1: # %if -; X86-64-NEXT: kandb %k1, %k0, %k1 +; X86-64-NEXT: kandb %k1, %k0, %k0 ; X86-64-NEXT: jmp .LBB9_3 ; X86-64-NEXT: .LBB9_2: # %else -; X86-64-NEXT: kaddb %k1, %k0, %k1 +; X86-64-NEXT: kaddb %k1, %k0, %k0 ; X86-64-NEXT: .LBB9_3: # %exit -; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; X86-64-NEXT: vmovaps %ymm1, (%rcx) +; X86-64-NEXT: vpmovm2d %k0, %zmm2 +; X86-64-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; X86-64-NEXT: vmovaps %ymm0, (%rcx) ; X86-64-NEXT: vzeroupper ; X86-64-NEXT: retq ; ; X86-32-LABEL: test_add: ; X86-32: # %bb.0: # %entry -; X86-32-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; X86-32-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -527,13 +516,14 @@ ; X86-32-NEXT: testb $1, {{[0-9]+}}(%esp) ; X86-32-NEXT: je .LBB9_2 ; X86-32-NEXT: # %bb.1: # %if -; X86-32-NEXT: kandb %k1, %k0, %k1 +; X86-32-NEXT: kandb %k1, %k0, %k0 ; X86-32-NEXT: jmp .LBB9_3 ; X86-32-NEXT: .LBB9_2: # %else -; X86-32-NEXT: kaddb %k1, %k0, %k1 +; X86-32-NEXT: kaddb %k1, %k0, %k0 ; X86-32-NEXT: .LBB9_3: # %exit -; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} -; X86-32-NEXT: vmovaps %ymm1, (%eax) +; X86-32-NEXT: vpmovm2d %k0, %zmm2 +; X86-32-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 +; X86-32-NEXT: vmovaps %ymm0, (%eax) ; X86-32-NEXT: vzeroupper ; X86-32-NEXT: retl entry: Index: test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- test/CodeGen/X86/masked_gather_scatter.ll +++ test/CodeGen/X86/masked_gather_scatter.ll @@ -812,11 +812,12 @@ ; KNL_64-LABEL: test15: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_64-NEXT: vmovdqa %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm2 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm0 -; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1} +; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 +; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1 +; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -824,12 +825,13 @@ ; KNL_32-LABEL: test15: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_32-NEXT: vmovdqa %xmm1, %xmm1 +; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm2 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm0 -; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm0 {%k1} +; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1 +; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -864,12 +866,10 @@ ; KNL_64-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 ; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_64-NEXT: vmovdqa %ymm1, %ymm1 +; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1} ; KNL_64-NEXT: vmovapd %ymm2, %ymm0 ; KNL_64-NEXT: retq @@ -879,13 +879,11 @@ ; KNL_32-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 ; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_32-NEXT: vmovdqa %ymm1, %ymm1 +; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1} ; KNL_32-NEXT: vmovapd %ymm2, %ymm0 ; KNL_32-NEXT: retl @@ -919,9 +917,10 @@ ; KNL_64-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 ; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0 -; KNL_64-NEXT: vmovdqa %xmm1, %xmm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1} ; KNL_64-NEXT: vmovapd %xmm2, %xmm0 ; KNL_64-NEXT: vzeroupper @@ -932,10 +931,11 @@ ; KNL_32-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 ; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0 -; KNL_32-NEXT: vmovdqa %xmm1, %xmm1 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1} ; KNL_32-NEXT: vmovapd %xmm2, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -979,9 +979,10 @@ ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_64-NEXT: vmovdqa %xmm2, %xmm2 -; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 +; KNL_64-NEXT: vpslld $31, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -990,10 +991,11 @@ ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 +; KNL_32-NEXT: vpslld $31, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1022,11 +1024,9 @@ ; KNL_64-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 ; KNL_64-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_64-NEXT: vmovdqa %ymm1, %ymm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vscatterqpd %zmm0, (%rdi,%zmm2,8) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -1036,12 +1036,10 @@ ; KNL_32-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 ; KNL_32-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_32-NEXT: vmovdqa %ymm1, %ymm1 +; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm2,8) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1073,10 +1071,10 @@ ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm2, %xmm2 -; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -1084,12 +1082,12 @@ ; KNL_32-LABEL: test20: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 +; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; KNL_32-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm2, %xmm2 ; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1119,10 +1117,11 @@ ; KNL_64-LABEL: test21: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 -; KNL_64-NEXT: vmovdqa %xmm2, %xmm2 +; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -1131,10 +1130,11 @@ ; KNL_32: # %bb.0: ; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1 ; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 +; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1170,12 +1170,12 @@ ; KNL_64-LABEL: test22: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm2 killed %xmm2 def %ymm2 +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm1, %xmm1 ; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1} ; KNL_64-NEXT: vmovaps %xmm2, %xmm0 ; KNL_64-NEXT: vzeroupper @@ -1184,13 +1184,13 @@ ; KNL_32-LABEL: test22: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm2 killed %xmm2 def %ymm2 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm1, %xmm1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1} ; KNL_32-NEXT: vmovaps %xmm2, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -1225,10 +1225,10 @@ ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm2 killed %xmm2 def %ymm2 ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm1, %xmm1 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1} ; KNL_64-NEXT: vmovaps %xmm2, %xmm0 ; KNL_64-NEXT: vzeroupper @@ -1238,11 +1238,11 @@ ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm2 killed %xmm2 def %ymm2 ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm1, %xmm1 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1} ; KNL_32-NEXT: vmovaps %xmm2, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -1275,30 +1275,30 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) { ; KNL_64-LABEL: test23: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm1, %xmm1 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1} -; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 +; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k1} +; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test23: ; KNL_32: # %bb.0: +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm1, %xmm1 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1} -; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 +; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1} +; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl ; @@ -1332,27 +1332,27 @@ ; KNL_64-LABEL: test23b: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; KNL_64-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm1, %xmm1 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1} -; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 +; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k1} +; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test23b: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm1, %xmm1 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1} -; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero +; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 +; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1} +; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl ; @@ -1433,9 +1433,10 @@ ; KNL_64-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 ; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0 -; KNL_64-NEXT: vmovdqa %xmm1, %xmm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1} ; KNL_64-NEXT: vmovdqa %xmm2, %xmm0 ; KNL_64-NEXT: vzeroupper @@ -1446,10 +1447,11 @@ ; KNL_32-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 ; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0 -; KNL_32-NEXT: vmovdqa %xmm1, %xmm1 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1} ; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -1500,10 +1502,8 @@ ; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 +; KNL_32-NEXT: movb $3, %cl +; KNL_32-NEXT: kmovw %ecx, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1} ; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -1597,10 +1597,8 @@ ; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1 ; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 +; KNL_32-NEXT: movb $3, %al +; KNL_32-NEXT: kmovw %eax, %k1 ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1686,83 +1684,79 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) { ; KNL_64-LABEL: test30: ; KNL_64: # %bb.0: +; KNL_64-NEXT: vpslld $31, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 +; KNL_64-NEXT: kmovw %k1, %eax ; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 ; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1 ; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1 -; KNL_64-NEXT: testb $1, %dil +; KNL_64-NEXT: testb $1, %al ; KNL_64-NEXT: # implicit-def: %xmm0 -; KNL_64-NEXT: jne .LBB31_1 -; KNL_64-NEXT: # %bb.2: # %else -; KNL_64-NEXT: testb $1, %sil -; KNL_64-NEXT: jne .LBB31_3 -; KNL_64-NEXT: .LBB31_4: # %else2 -; KNL_64-NEXT: testb $1, %dl -; KNL_64-NEXT: jne .LBB31_5 -; KNL_64-NEXT: .LBB31_6: # %else5 -; KNL_64-NEXT: vmovd %edi, %xmm1 -; KNL_64-NEXT: vpinsrb $4, %esi, %xmm1, %xmm1 -; KNL_64-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1 -; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; KNL_64-NEXT: .LBB31_1: # %cond.load +; KNL_64-NEXT: je .LBB31_2 +; KNL_64-NEXT: # %bb.1: # %cond.load ; KNL_64-NEXT: vmovq %xmm1, %rax ; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; KNL_64-NEXT: testb $1, %sil +; KNL_64-NEXT: .LBB31_2: # %else +; KNL_64-NEXT: kshiftrw $1, %k1, %k0 +; KNL_64-NEXT: kmovw %k0, %eax +; KNL_64-NEXT: testb $1, %al ; KNL_64-NEXT: je .LBB31_4 -; KNL_64-NEXT: .LBB31_3: # %cond.load1 +; KNL_64-NEXT: # %bb.3: # %cond.load1 ; KNL_64-NEXT: vpextrq $1, %xmm1, %rax ; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0 -; KNL_64-NEXT: testb $1, %dl +; KNL_64-NEXT: .LBB31_4: # %else2 +; KNL_64-NEXT: kshiftrw $2, %k1, %k0 +; KNL_64-NEXT: kmovw %k0, %eax +; KNL_64-NEXT: testb $1, %al ; KNL_64-NEXT: je .LBB31_6 -; KNL_64-NEXT: .LBB31_5: # %cond.load4 +; KNL_64-NEXT: # %bb.5: # %cond.load4 ; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1 ; KNL_64-NEXT: vmovq %xmm1, %rax ; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0 -; KNL_64-NEXT: jmp .LBB31_6 +; KNL_64-NEXT: .LBB31_6: # %else5 +; KNL_64-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; KNL_64-NEXT: vblendvps %xmm1, %xmm0, %xmm3, %xmm0 +; KNL_64-NEXT: vzeroupper +; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test30: ; KNL_32: # %bb.0: -; KNL_32-NEXT: pushl %esi -; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: .cfi_offset %esi, -8 -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %edx +; KNL_32-NEXT: subl $12, %esp +; KNL_32-NEXT: .cfi_def_cfa_offset 16 +; KNL_32-NEXT: vpslld $31, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1 +; KNL_32-NEXT: kmovw %k1, %eax ; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1 ; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1 -; KNL_32-NEXT: testb $1, %dl +; KNL_32-NEXT: testb $1, %al ; KNL_32-NEXT: # implicit-def: %xmm0 -; KNL_32-NEXT: jne .LBB31_1 -; KNL_32-NEXT: # %bb.2: # %else -; KNL_32-NEXT: testb $1, %cl -; KNL_32-NEXT: jne .LBB31_3 +; KNL_32-NEXT: je .LBB31_2 +; KNL_32-NEXT: # %bb.1: # %cond.load +; KNL_32-NEXT: vmovd %xmm1, %eax +; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL_32-NEXT: .LBB31_2: # %else +; KNL_32-NEXT: kshiftrw $1, %k1, %k0 +; KNL_32-NEXT: kmovw %k0, %eax +; KNL_32-NEXT: testb $1, %al +; KNL_32-NEXT: je .LBB31_4 +; KNL_32-NEXT: # %bb.3: # %cond.load1 +; KNL_32-NEXT: vpextrd $1, %xmm1, %eax +; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0 ; KNL_32-NEXT: .LBB31_4: # %else2 +; KNL_32-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm2 +; KNL_32-NEXT: kshiftrw $2, %k1, %k0 +; KNL_32-NEXT: kmovw %k0, %eax ; KNL_32-NEXT: testb $1, %al -; KNL_32-NEXT: jne .LBB31_5 +; KNL_32-NEXT: je .LBB31_6 +; KNL_32-NEXT: # %bb.5: # %cond.load4 +; KNL_32-NEXT: vpextrd $2, %xmm1, %eax +; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0 ; KNL_32-NEXT: .LBB31_6: # %else5 -; KNL_32-NEXT: vmovd %edx, %xmm1 -; KNL_32-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 -; KNL_32-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 +; KNL_32-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL_32-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 -; KNL_32-NEXT: popl %esi +; KNL_32-NEXT: addl $12, %esp +; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl -; KNL_32-NEXT: .LBB31_1: # %cond.load -; KNL_32-NEXT: vmovd %xmm1, %esi -; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; KNL_32-NEXT: testb $1, %cl -; KNL_32-NEXT: je .LBB31_4 -; KNL_32-NEXT: .LBB31_3: # %cond.load1 -; KNL_32-NEXT: vpextrd $1, %xmm1, %esi -; KNL_32-NEXT: vpinsrd $1, (%esi), %xmm0, %xmm0 -; KNL_32-NEXT: testb $1, %al -; KNL_32-NEXT: je .LBB31_6 -; KNL_32-NEXT: .LBB31_5: # %cond.load4 -; KNL_32-NEXT: vpextrd $2, %xmm1, %esi -; KNL_32-NEXT: vpinsrd $2, (%esi), %xmm0, %xmm0 -; KNL_32-NEXT: jmp .LBB31_6 ; ; SKX-LABEL: test30: ; SKX: # %bb.0: @@ -2355,11 +2349,9 @@ ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_64-NEXT: vmovdqa %ymm1, %ymm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1} ; KNL_64-NEXT: vpaddq %ymm1, %ymm1, %ymm0 ; KNL_64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 @@ -2376,12 +2368,10 @@ ; KNL_32-NEXT: subl $32, %esp ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 ; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_32-NEXT: vmovdqa %ymm1, %ymm1 +; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1} ; KNL_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0 ; KNL_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 @@ -2547,14 +2537,14 @@ ; KNL_64-LABEL: large_index: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 -; KNL_64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm0, %xmm0 -; KNL_64-NEXT: vmovq %rcx, %xmm2 -; KNL_64-NEXT: vmovq %rsi, %xmm3 -; KNL_64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; KNL_64-NEXT: vpslld $31, %ymm0, %ymm0 -; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm1 {%k1} +; KNL_64-NEXT: vpsllq $63, %xmm0, %xmm0 +; KNL_64-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 +; KNL_64-NEXT: vmovq %rcx, %xmm0 +; KNL_64-NEXT: vmovq %rsi, %xmm2 +; KNL_64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k1} ; KNL_64-NEXT: vmovaps %xmm1, %xmm0 ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -2562,16 +2552,16 @@ ; KNL_32-LABEL: large_index: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 -; KNL_32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm0, %xmm0 +; KNL_32-NEXT: vpsllq $63, %xmm0, %xmm0 +; KNL_32-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; KNL_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; KNL_32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; KNL_32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; KNL_32-NEXT: vpslld $31, %ymm0, %ymm0 -; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm1 {%k1} +; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm1 {%k1} ; KNL_32-NEXT: vmovaps %xmm1, %xmm0 ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -2700,9 +2690,10 @@ ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL_64-NEXT: vpsllq $32, %xmm1, %xmm1 ; KNL_64-NEXT: vpsraq $32, %zmm1, %zmm1 -; KNL_64-NEXT: vmovdqa %xmm2, %xmm2 -; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vscatterqpd %zmm0, (%rdi,%zmm1,8) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -2712,10 +2703,11 @@ ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1 ; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 +; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm1,8) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl Index: test/CodeGen/X86/masked_memop.ll =================================================================== --- test/CodeGen/X86/masked_memop.ll +++ test/CodeGen/X86/masked_memop.ll @@ -99,10 +99,15 @@ ; ; AVX512F-LABEL: test6: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test6: @@ -127,10 +132,15 @@ ; ; AVX512F-LABEL: test7: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vblendmps (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test7: @@ -163,10 +173,15 @@ ; ; AVX512F-LABEL: test8: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vpblendmd (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test8: @@ -197,9 +212,14 @@ ; ; AVX512F-LABEL: test9: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test9: @@ -237,11 +257,14 @@ ; ; AVX512F-LABEL: test10: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovsxdq %xmm0, %ymm0 -; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 -; AVX512F-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: test10: @@ -277,10 +300,13 @@ ; ; AVX512F-LABEL: test10b: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovsxdq %xmm0, %ymm0 -; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: test10b: @@ -525,11 +551,14 @@ ; ; AVX512F-LABEL: test14: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vmovups %zmm1, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test14: @@ -569,10 +598,12 @@ ; AVX512F: ## %bb.0: ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX512F-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test15: @@ -610,12 +641,15 @@ ; ; AVX512F-LABEL: test16: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vblendmps (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test16: @@ -659,12 +693,13 @@ ; AVX512F: ## %bb.0: ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} ; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test17: @@ -704,9 +739,12 @@ ; AVX512F: ## %bb.0: ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX512F-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test18: @@ -729,8 +767,11 @@ ; ; AVX512F-LABEL: load_all: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: movw $15, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: load_all: @@ -755,9 +796,12 @@ ; ; AVX512F-LABEL: mload_constmask_v4f32: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovaps {{.*#+}} xmm1 = [4294967295,0,4294967295,4294967295] -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm1, %xmm2 -; AVX512F-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: movw $13, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4f32: @@ -789,9 +833,12 @@ ; ; AVX512F-LABEL: mload_constmask_v4i32: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295] -; AVX512F-NEXT: vpmaskmovd (%rdi), %xmm1, %xmm2 -; AVX512F-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: movw $14, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4i32: @@ -843,9 +890,11 @@ ; ; AVX512F-LABEL: mload_constmask_v4f64: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovapd {{.*#+}} ymm1 = [18446744073709551615,18446744073709551615,18446744073709551615,0] -; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm1, %ymm2 -; AVX512F-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512F-NEXT: movb $7, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4f64: @@ -898,9 +947,11 @@ ; ; AVX512F-LABEL: mload_constmask_v4i64: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,0,0,18446744073709551615] -; AVX512F-NEXT: vpmaskmovq (%rdi), %ymm1, %ymm2 -; AVX512F-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512F-NEXT: movb $9, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4i64: @@ -950,8 +1001,10 @@ ; ; AVX512F-LABEL: mload_constmask_v4f64_undef_passthrough: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,18446744073709551615,0] -; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 +; AVX512F-NEXT: movb $7, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4f64_undef_passthrough: @@ -979,8 +1032,10 @@ ; ; AVX512F-LABEL: mload_constmask_v4i64_undef_passthrough: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551615,0] -; AVX512F-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm0 +; AVX512F-NEXT: movb $6, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4i64_undef_passthrough: @@ -1008,8 +1063,11 @@ ; ; AVX512F-LABEL: test21: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: movw $15, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test21: @@ -1225,7 +1283,14 @@ ; ; AVX512F-LABEL: trunc_mask: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmaskmovps %xmm0, %xmm2, (%rdi) +; AVX512F-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm1, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vmovups %zmm0, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: trunc_mask: Index: test/CodeGen/X86/pr33349.ll =================================================================== --- test/CodeGen/X86/pr33349.ll +++ test/CodeGen/X86/pr33349.ll @@ -8,32 +8,38 @@ define void @test(<4 x i1> %m, <4 x x86_fp80> %v, <4 x x86_fp80>*%p) local_unnamed_addr { ; KNL-LABEL: test: ; KNL: # %bb.0: # %bb -; KNL-NEXT: vpextrb $0, %xmm0, %eax +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftrw $2, %k0, %k1 +; KNL-NEXT: kshiftrw $1, %k1, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fld1 ; KNL-NEXT: fldz ; KNL-NEXT: fld %st(0) ; KNL-NEXT: fcmovne %st(2), %st(0) -; KNL-NEXT: vpextrb $4, %xmm0, %eax +; KNL-NEXT: kshiftrw $1, %k0, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fld %st(1) ; KNL-NEXT: fcmovne %st(3), %st(0) -; KNL-NEXT: vpextrb $8, %xmm0, %eax +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fld %st(2) ; KNL-NEXT: fcmovne %st(4), %st(0) -; KNL-NEXT: vpextrb $12, %xmm0, %eax +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fxch %st(3) ; KNL-NEXT: fcmovne %st(4), %st(0) ; KNL-NEXT: fstp %st(4) ; KNL-NEXT: fxch %st(3) -; KNL-NEXT: fstpt 30(%rdi) +; KNL-NEXT: fstpt (%rdi) ; KNL-NEXT: fxch %st(1) ; KNL-NEXT: fstpt 20(%rdi) ; KNL-NEXT: fxch %st(1) ; KNL-NEXT: fstpt 10(%rdi) -; KNL-NEXT: fstpt (%rdi) +; KNL-NEXT: fstpt 30(%rdi) +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test: Index: test/CodeGen/X86/sse-fsignum.ll =================================================================== --- test/CodeGen/X86/sse-fsignum.ll +++ test/CodeGen/X86/sse-fsignum.ll @@ -10,17 +10,44 @@ ; define void @signum32a(<4 x float>*) { -; AVX-LABEL: signum32a: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovaps (%rdi), %xmm0 -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vcvtdq2ps %xmm2, %xmm2 -; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 -; AVX-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vmovaps %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: signum32a: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovaps (%rdi), %xmm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vcvtdq2ps %xmm2, %xmm2 +; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX1-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vmovaps %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: signum32a: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovaps (%rdi), %xmm0 +; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vcvtdq2ps %xmm2, %xmm2 +; AVX2-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX2-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: vmovaps %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signum32a: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vmovaps (%rdi), %xmm0 +; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; AVX512F-NEXT: vcvtdq2ps %xmm2, %xmm2 +; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX512F-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vmovaps %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq entry: %1 = load <4 x float>, <4 x float>* %0 %2 = fcmp olt <4 x float> %1, zeroinitializer @@ -33,19 +60,48 @@ } define void @signum64a(<2 x double>*) { -; AVX-LABEL: signum64a: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovapd (%rdi), %xmm0 -; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3] -; AVX-NEXT: vcvtdq2pd %xmm2, %xmm2 -; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX-NEXT: vsubpd %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vmovapd %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: signum64a: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovapd (%rdi), %xmm0 +; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3] +; AVX1-NEXT: vcvtdq2pd %xmm2, %xmm2 +; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vmovapd %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: signum64a: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovapd (%rdi), %xmm0 +; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3] +; AVX2-NEXT: vcvtdq2pd %xmm2, %xmm2 +; AVX2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: vmovapd %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signum64a: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vmovapd (%rdi), %xmm0 +; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vcmpltpd %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; AVX512F-NEXT: vcvtdq2pd %xmm2, %xmm2 +; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512F-NEXT: vsubpd %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vmovapd %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq entry: %1 = load <2 x double>, <2 x double>* %0 %2 = fcmp olt <2 x double> %1, zeroinitializer @@ -152,11 +208,11 @@ ; AVX512F: # %bb.0: # %entry ; AVX512F-NEXT: vmovapd (%rdi), %ymm0 ; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vpmovqd %zmm2, %ymm2 +; AVX512F-NEXT: vcmpltpd %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; AVX512F-NEXT: vcvtdq2pd %xmm2, %ymm2 -; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX512F-NEXT: vsubpd %ymm0, %ymm2, %ymm0 ; AVX512F-NEXT: vmovapd %ymm0, (%rdi) Index: test/CodeGen/X86/vector-shuffle-v1.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-v1.ll +++ test/CodeGen/X86/vector-shuffle-v1.ll @@ -6,7 +6,15 @@ define <2 x i1> @shuf2i1_1_0(<2 x i1> %a) { ; AVX512F-LABEL: shuf2i1_1_0: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf2i1_1_0: @@ -37,9 +45,17 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) { ; AVX512F-LABEL: shuf2i1_1_2: ; AVX512F: # %bb.0: -; AVX512F-NEXT: movl $1, %eax +; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: movq $-1, %rax ; AVX512F-NEXT: vmovq %rax, %xmm1 ; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf2i1_1_2: @@ -75,7 +91,15 @@ define <4 x i1> @shuf4i1_3_2_10(<4 x i1> %a) { ; AVX512F-LABEL: shuf4i1_3_2_10: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] +; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] +; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf4i1_3_2_10: Index: test/CodeGen/X86/vselect-pcmp.ll =================================================================== --- test/CodeGen/X86/vselect-pcmp.ll +++ test/CodeGen/X86/vselect-pcmp.ll @@ -43,10 +43,17 @@ } define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { -; AVX12F-LABEL: signbit_sel_v4i32: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4i32: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4i32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4i32: ; AVX512VL: # %bb.0: @@ -60,10 +67,17 @@ } define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) { -; AVX12F-LABEL: signbit_sel_v2i64: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v2i64: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v2i64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v2i64: ; AVX512VL: # %bb.0: @@ -77,10 +91,17 @@ } define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) { -; AVX12F-LABEL: signbit_sel_v4f32: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4f32: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4f32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4f32: ; AVX512VL: # %bb.0: @@ -94,10 +115,17 @@ } define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) { -; AVX12F-LABEL: signbit_sel_v2f64: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v2f64: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v2f64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX512F-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v2f64: ; AVX512VL: # %bb.0: @@ -182,13 +210,9 @@ ; ; AVX512F-LABEL: signbit_sel_v8i32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 -; AVX512F-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 -; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 -; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} -; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; AVX512F-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v8i32: @@ -203,10 +227,17 @@ } define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) { -; AVX12F-LABEL: signbit_sel_v4i64: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4i64: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4i64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4i64: ; AVX512VL: # %bb.0: @@ -220,10 +251,17 @@ } define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) { -; AVX12F-LABEL: signbit_sel_v4f64: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4f64: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4f64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4f64: ; AVX512VL: # %bb.0: @@ -256,6 +294,8 @@ ; ; AVX512F-LABEL: signbit_sel_v4f64_small_mask: ; AVX512F: # %bb.0: +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 ; AVX512F-NEXT: vpmovsxdq %xmm2, %ymm2 ; AVX512F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: retq