Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3605,6 +3605,7 @@ SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDLoc dl(N); + EVT VT = N->getValueType(0); // WARNING: In this code we widen the compare instruction with garbage. // This garbage may contain denormal floats which may be slow. Is this a real @@ -3614,8 +3615,13 @@ // Only some of the compared elements are legal. EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), InOp0.getValueType()); + // The result type is legal, if its vXi1, keep vXi1 for the new SETCC. + if (VT.getScalarType() == MVT::i1) + SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + SVT.getVectorNumElements()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), - SVT, InOp0, InOp1, N->getOperand(2)); + SVT, InOp0, InOp1, N->getOperand(2)); // Extract the needed results from the result vector. EVT ResVT = EVT::getVectorVT(*DAG.getContext(), Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -460,7 +460,7 @@ // this happens we will use 512-bit operations and the mask will not be // zero extended. EVT OpVT = N->getOperand(0).getValueType(); - if (OpVT == MVT::v8i32 || OpVT == MVT::v8f32) + if (OpVT.is256BitVector() || OpVT.is128BitVector()) return Subtarget->hasVLX(); return true; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1144,6 +1144,8 @@ addRegisterClass(MVT::v8f64, &X86::VR512RegClass); addRegisterClass(MVT::v1i1, &X86::VK1RegClass); + addRegisterClass(MVT::v2i1, &X86::VK2RegClass); + addRegisterClass(MVT::v4i1, &X86::VK4RegClass); addRegisterClass(MVT::v8i1, &X86::VK8RegClass); addRegisterClass(MVT::v16i1, &X86::VK16RegClass); @@ -1160,15 +1162,14 @@ setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom); - // Extends of v16i1/v8i1 to 128-bit vectors. - setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i8, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v16i8, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i16, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v8i16, Custom); + // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors. + for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { + setOperationAction(ISD::SIGN_EXTEND, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND, VT, Custom); + setOperationAction(ISD::ANY_EXTEND, VT, Custom); + } - for (auto VT : { MVT::v8i1, MVT::v16i1 }) { + for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) { setOperationAction(ISD::ADD, VT, Custom); setOperationAction(ISD::SUB, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); @@ -1184,9 +1185,12 @@ } setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); - for (auto VT : { MVT::v1i1, MVT::v8i1 }) + for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); for (MVT VT : MVT::fp_vector_valuetypes()) @@ -1517,41 +1521,6 @@ } if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { - addRegisterClass(MVT::v4i1, &X86::VK4RegClass); - addRegisterClass(MVT::v2i1, &X86::VK2RegClass); - - for (auto VT : { MVT::v2i1, MVT::v4i1 }) { - setOperationAction(ISD::ADD, VT, Custom); - setOperationAction(ISD::SUB, VT, Custom); - setOperationAction(ISD::MUL, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Expand); - - setOperationAction(ISD::TRUNCATE, VT, Custom); - setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - } - - // TODO: v8i1 concat should be legal without VLX to support concats of - // v1i1, but we won't legalize it correctly currently without introducing - // a v4i1 concat in the middle. - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); - for (auto VT : { MVT::v2i1, MVT::v4i1 }) - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - - // Extends from v2i1/v4i1 masks to 128-bit vectors. - setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Custom); - setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); @@ -4931,8 +4900,6 @@ } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type"); - assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && - "Unexpected vector type"); Vec = DAG.getConstant(0, dl, VT); } else { unsigned Num32BitElts = VT.getSizeInBits() / 32; @@ -17745,6 +17712,19 @@ assert(EltVT == MVT::f32 || EltVT == MVT::f64); #endif + // Custom widen MVT::v2f32 to prevent the default widening + // from getting a result type of v4i32, extracting it to v2i32 and then + // trying to sign extend that to v2i1. + if (VT == MVT::v2i1 && Op1.getValueType() == MVT::v2f32) { + Op0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Op0, + DAG.getUNDEF(MVT::v2f32)); + Op1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Op1, + DAG.getUNDEF(MVT::v2f32)); + SDValue NewOp = DAG.getNode(ISD::SETCC, dl, MVT::v4i1, Op0, Op1, CC); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, NewOp, + DAG.getIntPtrConstant(0, dl)); + } + unsigned Opc; if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) { assert(VT.getVectorNumElements() <= 16); @@ -24383,8 +24363,8 @@ // Mask // At this point we have promoted mask operand - assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); - MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); + assert(MaskVT.getScalarType() == MVT::i1 && "unexpected mask type"); + MVT ExtMaskVT = MVT::getVectorVT(MVT::i1, NumElts); // Use the original mask here, do not modify the mask twice Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true); @@ -24393,12 +24373,9 @@ Src = ExtendToType(Src, NewVT, DAG); } } - // If the mask is "wide" at this point - truncate it to i1 vector - MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts); - Mask = DAG.getNode(ISD::TRUNCATE, dl, BitMaskVT, Mask); // The mask is killed by scatter, add it to the values - SDVTList VTs = DAG.getVTList(BitMaskVT, MVT::Other); + SDVTList VTs = DAG.getVTList(Mask.getValueType(), MVT::Other); SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index}; SDValue NewScatter = DAG.getTargetMemSDNode( VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand()); @@ -24421,11 +24398,6 @@ assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) && "Expanding masked load is supported for 32 and 64-bit types only!"); - // 4x32, 4x64 and 2x64 vectors of non-expanding loads are legal regardless of - // VLX. These types for exp-loads are handled here. - if (!N->isExpandingLoad() && VT.getVectorNumElements() <= 4) - return Op; - assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked load op."); @@ -24442,16 +24414,12 @@ Src0 = ExtendToType(Src0, WideDataVT, DAG); // Mask element has to be i1. - MVT MaskEltTy = Mask.getSimpleValueType().getScalarType(); - assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) && - "We handle 4x32, 4x64 and 2x64 vectors only in this case"); + assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && + "Unexpected mask type"); - MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); - if (MaskEltTy != MVT::i1) - Mask = DAG.getNode(ISD::TRUNCATE, dl, - MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask); SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), N->getMemOperand(), @@ -24480,10 +24448,6 @@ assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) && "Expanding masked load is supported for 32 and 64-bit types only!"); - // 4x32 and 2x64 vectors of non-compressing stores are legal regardless to VLX. - if (!N->isCompressingStore() && VT.getVectorNumElements() <= 4) - return Op; - assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked store op."); @@ -24498,17 +24462,13 @@ MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); // Mask element has to be i1. - MVT MaskEltTy = Mask.getSimpleValueType().getScalarType(); - assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) && - "We handle 4x32, 4x64 and 2x64 vectors only in this case"); + assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && + "Unexpected mask type"); - MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); - if (MaskEltTy != MVT::i1) - Mask = DAG.getNode(ISD::TRUNCATE, dl, - MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask); return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), Mask, N->getMemoryVT(), N->getMemOperand(), N->isTruncatingStore(), N->isCompressingStore()); @@ -24558,12 +24518,9 @@ Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); // Mask - MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts); - // At this point we have promoted mask operand - assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); - MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); - Mask = ExtendToType(Mask, ExtMaskVT, DAG, true); - Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask); + assert(MaskVT.getScalarType() == MVT::i1 && "unexpected mask type"); + MaskVT = MVT::getVectorVT(MVT::i1, NumElts); + Mask = ExtendToType(Mask, MaskVT, DAG, true); // The pass-through value MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts); @@ -24571,7 +24528,7 @@ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; SDValue NewGather = DAG.getTargetMemSDNode( - DAG.getVTList(NewVT, MaskBitVT, MVT::Other), Ops, dl, N->getMemoryVT(), + DAG.getVTList(NewVT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(), N->getMemOperand()); SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewGather.getValue(0), @@ -30423,7 +30380,7 @@ // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer // type, widen both sides to avoid a trip through memory. if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() && - Subtarget.hasVLX()) { + Subtarget.hasAVX512()) { SDLoc dl(N); N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0); N0 = DAG.getBitcast(MVT::v8i1, N0); @@ -30434,7 +30391,7 @@ // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer // type, widen both sides to avoid a trip through memory. if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() && - Subtarget.hasVLX()) { + Subtarget.hasAVX512()) { SDLoc dl(N); unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); SmallVector Ops(NumConcats, DAG.getUNDEF(SrcVT)); Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -2962,46 +2962,77 @@ defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>; defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>; -multiclass axv512_icmp_packed_no_vlx_lowering { -def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrr) - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; - -def : Pat<(v8i1 (and VK8:$mask, - (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))), +multiclass axv512_icmp_packed_no_vlx_lowering { +def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2))), + (COPY_TO_REGCLASS + (!cast(InstStr##Zrr) + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), + Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2)))), (COPY_TO_REGCLASS (!cast(InstStr##Zrrk) - (COPY_TO_REGCLASS VK8:$mask, VK16), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), - VK8)>; + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), + Narrow.KRC)>; } multiclass axv512_icmp_packed_cc_no_vlx_lowering { -def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (!cast(InstStr##Zrri) - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; - -def : Pat<(v8i1 (and VK8:$mask, (OpNode (_.info256.VT VR256X:$src1), - (_.info256.VT VR256X:$src2), imm:$cc))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) - (COPY_TO_REGCLASS VK8:$mask, VK16), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; + X86VectorVTInfo Narrow, + X86VectorVTInfo Wide> { +def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), imm:$cc)), + (COPY_TO_REGCLASS + (!cast(InstStr##Zrri) + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), + imm:$cc), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), imm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), + imm:$cc), Narrow.KRC)>; } let Predicates = [HasAVX512, NoVLX] in { - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; } // Mask setting all 0s or 1s @@ -3376,8 +3407,15 @@ // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't // available. Use a 512-bit operation and extract. let Predicates = [HasAVX512, NoVLX] in { + defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; + defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; + + defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; + defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; + defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; + defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; } let Predicates = [HasAVX512] in { Index: lib/Target/X86/X86InstrVecCompiler.td =================================================================== --- lib/Target/X86/X86InstrVecCompiler.td +++ lib/Target/X86/X86InstrVecCompiler.td @@ -495,6 +495,18 @@ // If the bits are not zero we have to fall back to explicitly zeroing by // using shifts. +let Predicates = [HasAVX512] in { + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + (v2i1 VK2:$mask), (iPTR 0))), + (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16), + (i8 14)), (i8 14))>; + + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + (v4i1 VK4:$mask), (iPTR 0))), + (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16), + (i8 12)), (i8 12))>; +} + let Predicates = [HasAVX512, NoDQI] in { def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), (v8i1 VK8:$mask), (iPTR 0))), @@ -506,9 +518,7 @@ def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), (v8i1 VK8:$mask), (iPTR 0))), (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>; -} -let Predicates = [HasVLX, HasDQI] in { def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), (v2i1 VK2:$mask), (iPTR 0))), (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8), @@ -519,17 +529,6 @@ (i8 4)), (i8 4))>; } -let Predicates = [HasVLX] in { - def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), - (v2i1 VK2:$mask), (iPTR 0))), - (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16), - (i8 14)), (i8 14))>; - def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), - (v4i1 VK4:$mask), (iPTR 0))), - (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16), - (i8 12)), (i8 12))>; -} - let Predicates = [HasBWI] in { def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), (v16i1 VK16:$mask), (iPTR 0))), Index: test/Analysis/CostModel/X86/cast.ll =================================================================== --- test/Analysis/CostModel/X86/cast.ll +++ test/Analysis/CostModel/X86/cast.ll @@ -8,11 +8,17 @@ define i32 @add(i32 %arg) { ; CHECK-LABEL: for function 'add' ; -- Same size registeres -- - ;CHECK: cost of 1 {{.*}} zext + ;CHECK-AVX512: cost of 12 {{.*}} zext + ;CHECK-AVX2: cost of 1 {{.*}} zext + ;CHECK-AVX: cost of 1 {{.*}} zext %A = zext <4 x i1> undef to <4 x i32> - ;CHECK: cost of 2 {{.*}} sext + ;CHECK-AVX512: cost of 12 {{.*}} sext + ;CHECK-AVX2: cost of 2 {{.*}} sext + ;CHECK-AVX: cost of 2 {{.*}} sext %B = sext <4 x i1> undef to <4 x i32> - ;CHECK: cost of 0 {{.*}} trunc + ;CHECK-AVX512: cost of 0 {{.*}} trunc + ;CHECK-AVX2: cost of 0 {{.*}} trunc + ;CHECK-AVX: cost of 0 {{.*}} trunc %C = trunc <4 x i32> undef to <4 x i1> ; -- Different size registers -- Index: test/CodeGen/X86/avx512-calling-conv.ll =================================================================== --- test/CodeGen/X86/avx512-calling-conv.ll +++ test/CodeGen/X86/avx512-calling-conv.ll @@ -57,21 +57,18 @@ } define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) { -; KNL-LABEL: test4: -; KNL: ## %bb.0: -; KNL-NEXT: vandps %xmm1, %xmm0, %xmm0 -; KNL-NEXT: retq -; -; SKX-LABEL: test4: -; SKX: ## %bb.0: -; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vpsrad $31, %xmm0, %xmm0 -; SKX-NEXT: retq +; ALL_X64-LABEL: test4: +; ALL_X64: ## %bb.0: +; ALL_X64-NEXT: vpand %xmm1, %xmm0, %xmm0 +; ALL_X64-NEXT: vpslld $31, %xmm0, %xmm0 +; ALL_X64-NEXT: vpsrad $31, %xmm0, %xmm0 +; ALL_X64-NEXT: retq ; ; KNL_X32-LABEL: test4: ; KNL_X32: ## %bb.0: -; KNL_X32-NEXT: vandps %xmm1, %xmm0, %xmm0 +; KNL_X32-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL_X32-NEXT: vpslld $31, %xmm0, %xmm0 +; KNL_X32-NEXT: vpsrad $31, %xmm0, %xmm0 ; KNL_X32-NEXT: retl %c = and <4 x i1>%a, %b ret <4 x i1> %c Index: test/CodeGen/X86/avx512-cvt.ll =================================================================== --- test/CodeGen/X86/avx512-cvt.ll +++ test/CodeGen/X86/avx512-cvt.ll @@ -702,9 +702,10 @@ ; NOVL-LABEL: f64to4f32_mask: ; NOVL: # %bb.0: ; NOVL-NEXT: vpslld $31, %xmm1, %xmm1 -; NOVL-NEXT: vpsrad $31, %xmm1, %xmm1 +; NOVL-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NOVL-NEXT: vcvtpd2ps %ymm0, %xmm0 -; NOVL-NEXT: vpand %xmm0, %xmm1, %xmm0 +; NOVL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NOVL-NEXT: vzeroupper ; NOVL-NEXT: retq ; @@ -743,9 +744,12 @@ define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) { ; NOVL-LABEL: f32to4f64_mask: ; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; NOVL-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NOVL-NEXT: vcvtps2pd %xmm0, %ymm0 -; NOVL-NEXT: vcmpltpd %ymm2, %ymm1, %ymm1 -; NOVL-NEXT: vandpd %ymm0, %ymm1, %ymm0 +; NOVL-NEXT: vcmpltpd %zmm2, %zmm1, %k1 +; NOVL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} +; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NOVL-NEXT: retq ; ; VL-LABEL: f32to4f64_mask: @@ -1591,12 +1595,15 @@ } define <4 x float> @sbto4f32(<4 x float> %a) { -; NOVL-LABEL: sbto4f32: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: sbto4f32: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NOVLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sbto4f32: ; VLDQ: # %bb.0: @@ -1614,19 +1621,30 @@ ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0 ; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: sbto4f32: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq %cmpres = fcmp ogt <4 x float> %a, zeroinitializer %1 = sitofp <4 x i1> %cmpres to <4 x float> ret <4 x float> %1 } define <4 x double> @sbto4f64(<4 x double> %a) { -; NOVL-LABEL: sbto4f64: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; NOVL-NEXT: vpmovqd %zmm0, %ymm0 -; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: sbto4f64: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0 +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sbto4f64: ; VLDQ: # %bb.0: @@ -1644,18 +1662,30 @@ ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; VLNODQ-NEXT: vcvtdq2pd %xmm0, %ymm0 ; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: sbto4f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512DQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX512DQ-NEXT: retq %cmpres = fcmp ogt <4 x double> %a, zeroinitializer %1 = sitofp <4 x i1> %cmpres to <4 x double> ret <4 x double> %1 } define <2 x float> @sbto2f32(<2 x float> %a) { -; NOVL-LABEL: sbto2f32: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: sbto2f32: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NOVLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sbto2f32: ; VLDQ: # %bb.0: @@ -1673,19 +1703,31 @@ ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0 ; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: sbto2f32: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq %cmpres = fcmp ogt <2 x float> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x float> ret <2 x float> %1 } define <2 x double> @sbto2f64(<2 x double> %a) { -; NOVL-LABEL: sbto2f64: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; NOVL-NEXT: vcvtdq2pd %xmm0, %xmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: sbto2f64: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sbto2f64: ; VLDQ: # %bb.0: @@ -1703,6 +1745,16 @@ ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0 ; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: sbto2f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq %cmpres = fcmp ogt <2 x double> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x double> ret <2 x double> %1 @@ -1925,10 +1977,12 @@ define <4 x float> @ubto4f32(<4 x i32> %a) { ; NOVL-LABEL: ubto4f32: ; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] -; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 +; NOVL-NEXT: vzeroupper ; NOVL-NEXT: retq ; ; VL-LABEL: ubto4f32: @@ -1946,9 +2000,10 @@ define <4 x double> @ubto4f64(<4 x i32> %a) { ; NOVL-LABEL: ubto4f64: ; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpsrld $31, %xmm0, %xmm0 +; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} ; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0 ; NOVL-NEXT: retq ; @@ -1969,14 +2024,10 @@ ; NOVL: # %bb.0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpextrb $8, %xmm0, %eax -; NOVL-NEXT: andl $1, %eax -; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1 -; NOVL-NEXT: vpextrb $0, %xmm0, %eax -; NOVL-NEXT: andl $1, %eax -; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 -; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; NOVL-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 +; NOVL-NEXT: vzeroupper ; NOVL-NEXT: retq ; ; VL-LABEL: ubto2f32: @@ -1997,10 +2048,8 @@ ; NOVL: # %bb.0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] -; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NOVL-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} ; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0 ; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NOVL-NEXT: vzeroupper Index: test/CodeGen/X86/avx512-ext.ll =================================================================== --- test/CodeGen/X86/avx512-ext.ll +++ test/CodeGen/X86/avx512-ext.ll @@ -301,9 +301,10 @@ ; KNL-LABEL: zext_4x8mem_to_4x32: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_4x8mem_to_4x32: @@ -322,9 +323,10 @@ ; KNL-LABEL: sext_4x8mem_to_4x32: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd (%rdi), %xmm1 -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxbd (%rdi), %xmm0 +; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_4x8mem_to_4x32: @@ -489,9 +491,10 @@ ; KNL-LABEL: zext_2x8mem_to_2x64: ; KNL: # %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 -; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_2x8mem_to_2x64: @@ -509,9 +512,10 @@ ; KNL-LABEL: sext_2x8mem_to_2x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 -; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxbq (%rdi), %xmm1 -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxbq (%rdi), %xmm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_2x8mem_to_2x64mask: @@ -539,10 +543,10 @@ ; KNL-LABEL: zext_4x8mem_to_4x64: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; KNL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_4x8mem_to_4x64: @@ -561,10 +565,10 @@ ; KNL-LABEL: sext_4x8mem_to_4x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 -; KNL-NEXT: vpmovsxbq (%rdi), %ymm1 -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxbq (%rdi), %ymm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_4x8mem_to_4x64mask: @@ -645,9 +649,10 @@ ; KNL-LABEL: zext_4x16mem_to_4x32: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_4x16mem_to_4x32: @@ -666,9 +671,10 @@ ; KNL-LABEL: sext_4x16mem_to_4x32mask: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxwd (%rdi), %xmm1 -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxwd (%rdi), %xmm0 +; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_4x16mem_to_4x32mask: @@ -865,9 +871,10 @@ ; KNL-LABEL: zext_2x16mem_to_2x64: ; KNL: # %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 -; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_2x16mem_to_2x64: @@ -886,9 +893,10 @@ ; KNL-LABEL: sext_2x16mem_to_2x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 -; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxwq (%rdi), %xmm1 -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxwq (%rdi), %xmm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_2x16mem_to_2x64mask: @@ -917,10 +925,10 @@ ; KNL-LABEL: zext_4x16mem_to_4x64: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; KNL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_4x16mem_to_4x64: @@ -939,10 +947,10 @@ ; KNL-LABEL: sext_4x16mem_to_4x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 -; KNL-NEXT: vpmovsxwq (%rdi), %ymm1 -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxwq (%rdi), %ymm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_4x16mem_to_4x64mask: @@ -1052,9 +1060,10 @@ ; KNL-LABEL: zext_2x32mem_to_2x64: ; KNL: # %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 -; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_2x32mem_to_2x64: @@ -1073,9 +1082,10 @@ ; KNL-LABEL: sext_2x32mem_to_2x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 -; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxdq (%rdi), %xmm1 -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxdq (%rdi), %xmm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_2x32mem_to_2x64mask: @@ -1104,10 +1114,10 @@ ; KNL-LABEL: zext_4x32mem_to_4x64: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_4x32mem_to_4x64: @@ -1126,10 +1136,10 @@ ; KNL-LABEL: sext_4x32mem_to_4x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 -; KNL-NEXT: vpmovsxdq (%rdi), %ymm1 -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxdq (%rdi), %ymm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_4x32mem_to_4x64mask: @@ -1167,10 +1177,10 @@ ; KNL-LABEL: zext_4x32_to_4x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_4x32_to_4x64mask: Index: test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- test/CodeGen/X86/avx512-insert-extract.ll +++ test/CodeGen/X86/avx512-insert-extract.ll @@ -845,40 +845,20 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) { ; KNL-LABEL: test_iinsertelement_v4i1: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: setb %al -; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpextrb $4, %xmm0, %ecx -; KNL-NEXT: kmovw %ecx, %k0 -; KNL-NEXT: vpextrb $0, %xmm0, %ecx -; KNL-NEXT: andl $1, %ecx -; KNL-NEXT: kmovw %ecx, %k1 -; KNL-NEXT: kshiftrw $1, %k0, %k2 -; KNL-NEXT: kshiftlw $1, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k2 -; KNL-NEXT: kxorw %k0, %k2, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kxorw %k1, %k0, %k0 +; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; KNL-NEXT: kshiftrw $2, %k0, %k1 ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kxorw %k2, %k1, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $13, %k1, %k1 ; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k1 -; KNL-NEXT: vpextrb $12, %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def %al killed %al killed %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_iinsertelement_v4i1: @@ -906,18 +886,11 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) { ; KNL-LABEL: test_iinsertelement_v2i1: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: setb %al -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpextrb $0, %xmm0, %ecx -; KNL-NEXT: andl $1, %ecx -; KNL-NEXT: kmovw %ecx, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: korw %k0, %k1, %k0 +; KNL-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k1 ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kxorw %k2, %k1, %k1 @@ -926,6 +899,7 @@ ; KNL-NEXT: kxorw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def %al killed %al killed %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_iinsertelement_v2i1: @@ -953,15 +927,15 @@ define zeroext i8 @test_extractelement_v2i1(<2 x i64> %a, <2 x i64> %b) { ; KNL-LABEL: test_extractelement_v2i1: ; KNL: ## %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpextrb $0, %xmm0, %eax +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andb $1, %al ; KNL-NEXT: movb $4, %cl ; KNL-NEXT: subb %al, %cl ; KNL-NEXT: movzbl %cl, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_v2i1: @@ -982,15 +956,15 @@ define zeroext i8 @extractelement_v2i1_alt(<2 x i64> %a, <2 x i64> %b) { ; KNL-LABEL: extractelement_v2i1_alt: ; KNL: ## %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpextrb $0, %xmm0, %eax +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andb $1, %al ; KNL-NEXT: movb $4, %cl ; KNL-NEXT: subb %al, %cl ; KNL-NEXT: movzbl %cl, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: extractelement_v2i1_alt: @@ -1012,12 +986,13 @@ define zeroext i8 @test_extractelement_v4i1(<4 x i32> %a, <4 x i32> %b) { ; KNL-LABEL: test_extractelement_v4i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpextrd $3, %xmm0, %eax +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; KNL-NEXT: kshiftrw $3, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andl $1, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_v4i1: @@ -1551,14 +1526,15 @@ ; KNL-LABEL: test_extractelement_varible_v2i1: ; KNL: ## %bb.0: ; KNL-NEXT: ## kill: def %edi killed %edi def %rdi -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vextracti32x4 $0, %zmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $1, %edi -; KNL-NEXT: movl -24(%rsp,%rdi,8), %eax +; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax ; KNL-NEXT: andl $1, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_varible_v2i1: @@ -1581,14 +1557,15 @@ ; KNL-LABEL: test_extractelement_varible_v4i1: ; KNL: ## %bb.0: ; KNL-NEXT: ## kill: def %edi killed %edi def %rdi -; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vextracti32x4 $0, %zmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $3, %edi -; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax +; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax ; KNL-NEXT: andl $1, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_varible_v4i1: Index: test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -3004,20 +3004,8 @@ define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) { ; CHECK-LABEL: test_mask_vextractf32x4: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovd %edi, %xmm2 -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kshiftrw $3, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %eax -; CHECK-NEXT: kshiftrw $2, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %ecx -; CHECK-NEXT: kshiftrw $1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %edx -; CHECK-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2 -; CHECK-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 -; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1 -; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 -; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask) ret <4 x float> %res @@ -3028,21 +3016,8 @@ define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) { ; CHECK-LABEL: test_mask_vextracti64x4: ; CHECK: ## %bb.0: -; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1 -; CHECK-NEXT: vmovd %edi, %xmm2 -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kshiftrw $3, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %eax -; CHECK-NEXT: kshiftrw $2, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %ecx -; CHECK-NEXT: kshiftrw $1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %edx -; CHECK-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2 -; CHECK-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 -; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 -; CHECK-NEXT: vpmovsxdq %xmm2, %ymm2 -; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm0 {%k1} ; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 1, <4 x i64> %b, i8 %mask) ret <4 x i64> %res @@ -3053,21 +3028,8 @@ define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { ; CHECK-LABEL: test_maskz_vextracti32x4: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovd %edi, %xmm1 -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kshiftrw $3, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %eax -; CHECK-NEXT: kshiftrw $2, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %ecx -; CHECK-NEXT: kshiftrw $1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %edx -; CHECK-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; CHECK-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; CHECK-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 -; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 -; CHECK-NEXT: vpsrad $31, %xmm1, %xmm1 -; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask) ret <4 x i32> %res Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -498,11 +498,15 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { ; KNL-LABEL: test4: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpmovqd %zmm0, %ymm0 -; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 -; KNL-NEXT: vpmovqd %zmm1, %ymm1 -; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: ## kill: def %ymm3 killed %ymm3 def %zmm3 +; KNL-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2 +; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; KNL-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; KNL-NEXT: kandnw %k0, %k1, %k1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -517,21 +521,29 @@ ; ; AVX512BW-LABEL: test4: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 -; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: ## kill: def %ymm3 killed %ymm3 def %zmm3 +; AVX512BW-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2 +; AVX512BW-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512BW-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; AVX512BW-NEXT: kandnw %k0, %k1, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test4: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512DQ-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 -; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512DQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX512DQ-NEXT: ## kill: def %ymm3 killed %ymm3 def %zmm3 +; AVX512DQ-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2 +; AVX512DQ-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512DQ-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512DQ-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; AVX512DQ-NEXT: kandnw %k0, %k1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %x_gt_y = icmp sgt <4 x i64> %x, %y @@ -544,9 +556,16 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { ; KNL-LABEL: test5: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 -; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; KNL-NEXT: ## kill: def %xmm3 killed %xmm3 def %zmm3 +; KNL-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 +; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; KNL-NEXT: kandnw %k1, %k0, %k1 +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test5: @@ -559,16 +578,30 @@ ; ; AVX512BW-LABEL: test5: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX512BW-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 -; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX512BW-NEXT: ## kill: def %xmm3 killed %xmm3 def %zmm3 +; AVX512BW-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512BW-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 +; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; AVX512BW-NEXT: kandnw %k1, %k0, %k1 +; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test5: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX512DQ-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 -; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX512DQ-NEXT: ## kill: def %xmm3 killed %xmm3 def %zmm3 +; AVX512DQ-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512DQ-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; AVX512DQ-NEXT: kandnw %k1, %k0, %k0 +; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 +; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %x_gt_y = icmp slt <2 x i64> %x, %y %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 @@ -795,10 +828,17 @@ ; KNL-LABEL: test11: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: jg LBB20_2 -; KNL-NEXT: ## %bb.1: -; KNL-NEXT: vmovaps %xmm1, %xmm0 -; KNL-NEXT: LBB20_2: +; KNL-NEXT: jg LBB20_1 +; KNL-NEXT: ## %bb.2: +; KNL-NEXT: vpslld $31, %xmm1, %xmm0 +; KNL-NEXT: jmp LBB20_3 +; KNL-NEXT: LBB20_1: +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 +; KNL-NEXT: LBB20_3: +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test11: @@ -818,19 +858,33 @@ ; AVX512BW-LABEL: test11: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi -; AVX512BW-NEXT: jg LBB20_2 -; AVX512BW-NEXT: ## %bb.1: -; AVX512BW-NEXT: vmovaps %xmm1, %xmm0 -; AVX512BW-NEXT: LBB20_2: +; AVX512BW-NEXT: jg LBB20_1 +; AVX512BW-NEXT: ## %bb.2: +; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0 +; AVX512BW-NEXT: jmp LBB20_3 +; AVX512BW-NEXT: LBB20_1: +; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512BW-NEXT: LBB20_3: +; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test11: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi -; AVX512DQ-NEXT: jg LBB20_2 -; AVX512DQ-NEXT: ## %bb.1: -; AVX512DQ-NEXT: vmovaps %xmm1, %xmm0 -; AVX512DQ-NEXT: LBB20_2: +; AVX512DQ-NEXT: jg LBB20_1 +; AVX512DQ-NEXT: ## %bb.2: +; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm0 +; AVX512DQ-NEXT: jmp LBB20_3 +; AVX512DQ-NEXT: LBB20_1: +; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512DQ-NEXT: LBB20_3: +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %mask = icmp sgt i32 %a1, %b1 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b @@ -1271,8 +1325,7 @@ define void @test22(<4 x i1> %a, <4 x i1>* %addr) { ; KNL-LABEL: test22: ; KNL: ## %bb.0: -; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL-NEXT: vpslld $31, %ymm0, %ymm0 +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) @@ -1288,8 +1341,7 @@ ; ; AVX512BW-LABEL: test22: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0 -; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) @@ -1298,8 +1350,7 @@ ; ; AVX512DQ-LABEL: test22: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0 -; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper @@ -1311,8 +1362,7 @@ define void @test23(<2 x i1> %a, <2 x i1>* %addr) { ; KNL-LABEL: test23: ; KNL: ## %bb.0: -; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 +; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) @@ -1328,8 +1378,7 @@ ; ; AVX512BW-LABEL: test23: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 -; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) @@ -1338,8 +1387,7 @@ ; ; AVX512DQ-LABEL: test23: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 -; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper @@ -1390,10 +1438,9 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; KNL-LABEL: store_v2i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 +; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL-NEXT: knotw %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: vzeroupper @@ -1409,10 +1456,9 @@ ; ; AVX512BW-LABEL: store_v2i1: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: knotw %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) ; AVX512BW-NEXT: vzeroupper @@ -1420,10 +1466,9 @@ ; ; AVX512DQ-LABEL: store_v2i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: knotw %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -1435,10 +1480,9 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; KNL-LABEL: store_v4i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpslld $31, %ymm0, %ymm0 +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: knotw %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: vzeroupper @@ -1454,10 +1498,9 @@ ; ; AVX512BW-LABEL: store_v4i1: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: knotw %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) ; AVX512BW-NEXT: vzeroupper @@ -1465,10 +1508,9 @@ ; ; AVX512DQ-LABEL: store_v4i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: knotw %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq Index: test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512-vec-cmp.ll +++ test/CodeGen/X86/avx512-vec-cmp.ll @@ -72,9 +72,13 @@ define <4 x float> @test7(<4 x float> %a, <4 x float> %b) { ; KNL-LABEL: test7: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 -; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test7: @@ -92,9 +96,13 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) { ; KNL-LABEL: test8: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2 -; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test8: @@ -543,8 +551,11 @@ define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind { ; KNL-LABEL: test30: ; KNL: ## %bb.0: -; KNL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm2 -; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; KNL-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test30: @@ -561,8 +572,13 @@ define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind { ; KNL-LABEL: test31: ; KNL: ## %bb.0: -; KNL-NEXT: vcmpltpd (%rdi), %xmm0, %xmm2 -; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vmovupd (%rdi), %xmm2 +; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test31: @@ -580,8 +596,12 @@ define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind { ; KNL-LABEL: test32: ; KNL: ## %bb.0: -; KNL-NEXT: vcmpltpd (%rdi), %ymm0, %ymm2 -; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; KNL-NEXT: vmovupd (%rdi), %ymm2 +; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test32: @@ -611,8 +631,13 @@ define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind { ; KNL-LABEL: test34: ; KNL: ## %bb.0: -; KNL-NEXT: vcmpltps (%rdi), %xmm0, %xmm2 -; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vmovups (%rdi), %xmm2 +; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test34: @@ -680,9 +705,12 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind { ; KNL-LABEL: test38: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL-NEXT: vbroadcastsd (%rdi), %ymm2 -; KNL-NEXT: vcmpltpd %ymm2, %ymm0, %ymm2 -; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test38: @@ -703,9 +731,13 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind { ; KNL-LABEL: test39: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2 -; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test39: @@ -769,9 +801,13 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind { ; KNL-LABEL: test42: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vbroadcastss (%rdi), %xmm2 -; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 -; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test42: Index: test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll =================================================================== --- test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll +++ test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll @@ -6,18 +6,12 @@ define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512: ; CHECK: ## %bb.0: -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vmovd %edi, %xmm2 -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kshiftrb $1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; CHECK-NEXT: vpsllq $63, %xmm2, %xmm2 -; CHECK-NEXT: vpsraq $63, %zmm2, %zmm2 -; CHECK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 -; CHECK-NEXT: vandpd %xmm0, %xmm2, %xmm2 -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} +; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3) %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3) Index: test/CodeGen/X86/avx512vl-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512vl-vec-cmp.ll +++ test/CodeGen/X86/avx512vl-vec-cmp.ll @@ -11,8 +11,11 @@ ; ; NoVLX-LABEL: test256_1: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm2 -; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp eq <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y @@ -28,8 +31,12 @@ ; ; NoVLX-LABEL: test256_2: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 +; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp sgt <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y @@ -66,11 +73,12 @@ ; ; NoVLX-LABEL: test256_4: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm3, %ymm1, %ymm4 -; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0 -; NoVLX-NEXT: vpcmpgtq %ymm4, %ymm0, %ymm0 -; NoVLX-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 +; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp ugt <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y @@ -289,12 +297,14 @@ ; ; NoVLX-LABEL: test256_10: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm3 -; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1 -; NoVLX-NEXT: vpandn %ymm3, %ymm1, %ymm1 -; NoVLX-NEXT: vblendvpd %ymm1, %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: # kill: def %ymm3 killed %ymm3 def %zmm3 +; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} +; NoVLX-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <4 x i64> %x1, %y1 %mask0 = icmp sle <4 x i64> %x, %y @@ -313,10 +323,14 @@ ; ; NoVLX-LABEL: test256_11: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm3 -; NoVLX-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm2 -; NoVLX-NEXT: vpand %ymm2, %ymm3, %ymm2 -; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %ymm3 +; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 {%k1} +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sgt <4 x i64> %x1, %y1 %y = load <4 x i64>, <4 x i64>* %y.ptr, align 4 @@ -362,9 +376,12 @@ ; ; NoVLX-LABEL: test256_13: ; NoVLX: # %bb.0: +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm2 -; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0 @@ -437,11 +454,14 @@ ; ; NoVLX-LABEL: test256_16: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm2 +; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm3 -; NoVLX-NEXT: vpcmpgtq %ymm3, %ymm0, %ymm3 -; NoVLX-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1 +; NoVLX-NEXT: vpcmpleq %zmm1, %zmm2, %k1 {%k1} +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <4 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 @@ -550,8 +570,11 @@ ; ; NoVLX-LABEL: test128_1: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2 -; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp eq <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y @@ -567,8 +590,12 @@ ; ; NoVLX-LABEL: test128_2: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp sgt <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y @@ -584,10 +611,12 @@ ; ; NoVLX-LABEL: test128_3: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %xmm3, %xmm0, %xmm0 -; NoVLX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 +; NoVLX-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp sge <4 x i32> %x, %y %max = select <4 x i1> %mask, <4 x i32> %x1, <4 x i32> %y @@ -603,11 +632,12 @@ ; ; NoVLX-LABEL: test128_4: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm3, %xmm1, %xmm4 -; NoVLX-NEXT: vpxor %xmm3, %xmm0, %xmm0 -; NoVLX-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm0 -; NoVLX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp ugt <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y @@ -623,8 +653,12 @@ ; ; NoVLX-LABEL: test128_5: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %yp, align 4 %mask = icmp eq <4 x i32> %x, %y @@ -641,8 +675,12 @@ ; ; NoVLX-LABEL: test128_5b: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpeqd %zmm0, %zmm2, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %yp, align 4 %mask = icmp eq <4 x i32> %y, %x @@ -659,8 +697,12 @@ ; ; NoVLX-LABEL: test128_6: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sgt <4 x i32> %x, %y @@ -677,8 +719,12 @@ ; ; NoVLX-LABEL: test128_6b: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp slt <4 x i32> %y, %x @@ -695,10 +741,12 @@ ; ; NoVLX-LABEL: test128_7: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sle <4 x i32> %x, %y @@ -715,10 +763,12 @@ ; ; NoVLX-LABEL: test128_7b: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sge <4 x i32> %y, %x @@ -735,9 +785,12 @@ ; ; NoVLX-LABEL: test128_8: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpminud (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpleud %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ule <4 x i32> %x, %y @@ -754,10 +807,12 @@ ; ; NoVLX-LABEL: test128_8b: ; NoVLX: # %bb.0: +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 -; NoVLX-NEXT: vpmaxud %xmm0, %xmm2, %xmm3 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpnltud %zmm0, %zmm2, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp uge <4 x i32> %y, %x @@ -775,10 +830,14 @@ ; ; NoVLX-LABEL: test128_9: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm3 -; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm3 killed %xmm3 def %zmm3 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp eq <4 x i32> %x1, %y1 %mask0 = icmp eq <4 x i32> %x, %y @@ -797,12 +856,14 @@ ; ; NoVLX-LABEL: test128_10: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm3 -; NoVLX-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 -; NoVLX-NEXT: vpxor %xmm4, %xmm3, %xmm3 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 -; NoVLX-NEXT: vpandn %xmm3, %xmm1, %xmm1 -; NoVLX-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 +; NoVLX-NEXT: # kill: def %xmm3 killed %xmm3 def %zmm3 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} +; NoVLX-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <2 x i64> %x1, %y1 %mask0 = icmp sle <2 x i64> %x, %y @@ -821,10 +882,14 @@ ; ; NoVLX-LABEL: test128_11: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm3 -; NoVLX-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm2 -; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 -; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm3 +; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 {%k1} +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sgt <2 x i64> %x1, %y1 %y = load <2 x i64>, <2 x i64>* %y.ptr, align 4 @@ -844,11 +909,14 @@ ; ; NoVLX-LABEL: test128_12: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm2 -; NoVLX-NEXT: vpminud (%rdi), %xmm0, %xmm3 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm3 -; NoVLX-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm3 +; NoVLX-NEXT: vpcmpleud %zmm3, %zmm0, %k1 +; NoVLX-NEXT: vpcmpled %zmm1, %zmm2, %k1 {%k1} +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <4 x i32> %x1, %y1 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 @@ -867,9 +935,12 @@ ; ; NoVLX-LABEL: test128_13: ; NoVLX: # %bb.0: +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm2 -; NoVLX-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm2 -; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0 @@ -888,11 +959,12 @@ ; ; NoVLX-LABEL: test128_14: ; NoVLX: # %bb.0: +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm2 -; NoVLX-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0 @@ -912,11 +984,14 @@ ; ; NoVLX-LABEL: test128_15: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm2 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm3 -; NoVLX-NEXT: vpcmpgtd %xmm3, %xmm0, %xmm3 -; NoVLX-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpgtd %zmm3, %zmm0, %k1 +; NoVLX-NEXT: vpcmpled %zmm1, %zmm2, %k1 {%k1} +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <4 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 @@ -938,11 +1013,14 @@ ; ; NoVLX-LABEL: test128_16: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm2 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm3 -; NoVLX-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 -; NoVLX-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1 +; NoVLX-NEXT: vpcmpleq %zmm1, %zmm2, %k1 {%k1} +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <2 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 @@ -963,10 +1041,12 @@ ; ; NoVLX-LABEL: test128_17: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ne <4 x i32> %x, %y @@ -983,10 +1063,12 @@ ; ; NoVLX-LABEL: test128_18: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpneqd %zmm0, %zmm2, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ne <4 x i32> %y, %x @@ -1003,9 +1085,12 @@ ; ; NoVLX-LABEL: test128_19: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpmaxud (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpnltud %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp uge <4 x i32> %x, %y @@ -1022,10 +1107,12 @@ ; ; NoVLX-LABEL: test128_20: ; NoVLX: # %bb.0: +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 -; NoVLX-NEXT: vpmaxud %xmm0, %xmm2, %xmm3 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpnltud %zmm0, %zmm2, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp uge <4 x i32> %y, %x Index: test/CodeGen/X86/avx512vl-vec-masked-cmp.ll =================================================================== --- test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -2373,37 +2373,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2424,37 +2401,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2477,50 +2431,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2545,50 +2464,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2614,38 +2498,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2669,51 +2529,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2740,37 +2564,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2791,37 +2592,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2844,50 +2622,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2912,50 +2655,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2981,38 +2689,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -3036,51 +2720,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -3113,13 +2761,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3154,13 +2812,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3197,8 +2865,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3206,17 +2876,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3255,8 +2921,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3264,17 +2932,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3314,14 +2978,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3359,9 +3032,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3369,17 +3043,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3420,16 +3090,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3467,16 +3146,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3516,8 +3204,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3525,20 +3215,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3580,8 +3265,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3589,20 +3276,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3645,17 +3327,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3696,9 +3386,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3706,20 +3397,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -5338,13 +5024,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5365,13 +5052,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5394,20 +5082,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5432,20 +5115,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5471,14 +5149,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5502,21 +5180,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5544,23 +5216,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5581,23 +5244,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5620,30 +5274,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5668,30 +5307,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5717,24 +5341,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5758,31 +5372,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5809,23 +5407,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5846,23 +5435,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5885,30 +5465,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5933,30 +5498,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5982,24 +5532,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -6023,31 +5563,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -6080,13 +5604,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6121,13 +5649,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6164,20 +5696,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6216,20 +5746,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6269,14 +5797,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6314,21 +5845,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6369,16 +5897,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6416,16 +5947,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6465,23 +5999,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6523,23 +6054,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6582,17 +6110,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6633,24 +6163,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6689,36 +6215,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6743,36 +6244,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6799,49 +6275,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6870,49 +6309,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6942,37 +6344,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -7000,50 +6376,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -7074,36 +6412,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7128,36 +6441,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7184,49 +6472,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7255,49 +6506,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7327,37 +6541,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7385,50 +6573,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7465,14 +6615,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7508,14 +6667,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7553,9 +6721,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7563,17 +6732,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7613,9 +6778,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7623,17 +6789,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7674,15 +6836,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7721,10 +6891,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7732,17 +6902,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7784,17 +6950,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7833,17 +7007,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7884,9 +7066,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7894,20 +7077,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7950,9 +7128,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7960,20 +7139,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -8017,18 +7191,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -8070,10 +7251,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -8081,20 +7262,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -11477,37 +10653,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11528,37 +10681,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11581,50 +10711,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11649,50 +10744,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11718,38 +10778,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11773,51 +10809,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11844,37 +10844,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11895,37 +10872,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11948,50 +10902,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -12016,50 +10935,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -12085,38 +10969,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -12140,51 +11000,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -12217,13 +11041,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12258,13 +11092,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12301,8 +11145,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12310,17 +11156,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12359,8 +11201,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12368,17 +11212,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12418,14 +11258,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12463,9 +11312,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12473,17 +11323,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12524,16 +11370,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12571,16 +11426,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12620,8 +11484,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12629,20 +11495,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12684,8 +11545,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12693,20 +11556,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12749,17 +11607,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12800,9 +11666,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12810,20 +11677,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -14442,13 +13304,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14469,13 +13332,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14498,20 +13362,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14536,20 +13395,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14575,14 +13429,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14606,21 +13460,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14648,23 +13496,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14685,23 +13524,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14724,30 +13554,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14772,30 +13587,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14821,24 +13621,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14862,31 +13652,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14913,23 +13687,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14950,23 +13715,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14989,30 +13745,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -15037,30 +13778,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -15086,24 +13812,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -15127,31 +13843,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -15184,13 +13884,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15225,13 +13929,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15268,20 +13976,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15320,20 +14026,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15373,14 +14077,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15418,21 +14125,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15473,16 +14177,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15520,16 +14227,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15569,23 +14279,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15627,23 +14334,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15686,17 +14390,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15737,24 +14443,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15793,36 +14495,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -15847,36 +14524,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -15903,49 +14555,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -15974,49 +14589,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -16046,37 +14624,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -16104,50 +14656,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -16178,36 +14692,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16232,36 +14721,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16288,49 +14752,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16359,49 +14786,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16431,37 +14821,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16489,50 +14853,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16569,14 +14895,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16612,14 +14947,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16657,9 +15001,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -16667,17 +15012,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16717,9 +15058,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -16727,17 +15069,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16778,15 +15116,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16825,10 +15171,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -16836,17 +15182,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16888,17 +15230,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16937,17 +15287,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16988,9 +15346,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -16998,20 +15357,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -17054,9 +15408,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -17064,20 +15419,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -17121,18 +15471,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -17174,10 +15531,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -17185,20 +15542,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -20677,39 +19029,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20730,40 +19057,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20786,50 +19087,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20854,51 +19120,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20924,40 +19154,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20981,51 +19185,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21052,39 +19220,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21105,40 +19248,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21161,50 +19278,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21229,51 +19311,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21299,40 +19345,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21356,51 +19376,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21433,15 +19417,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21476,16 +19468,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21522,8 +19521,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21531,17 +19532,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21580,9 +19577,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21590,17 +19588,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21640,16 +19634,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21687,9 +19688,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21697,17 +19699,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21748,18 +19746,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21797,19 +19802,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21849,8 +19860,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21858,20 +19871,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21913,9 +19921,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21923,20 +19932,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21979,19 +19983,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -22032,9 +20042,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -22042,20 +20053,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -23674,15 +21680,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23703,16 +21708,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23735,20 +21738,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23773,21 +21771,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23813,16 +21805,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23846,21 +21836,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23888,25 +21872,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -23927,26 +21900,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -23969,30 +21930,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24017,31 +21963,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24067,26 +21997,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24110,31 +22028,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24161,25 +22063,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24200,26 +22091,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24242,30 +22121,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24290,31 +22154,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24340,26 +22188,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24383,31 +22219,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24440,15 +22260,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24483,16 +22305,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24529,20 +22352,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24581,21 +22402,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24635,16 +22453,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24682,21 +22501,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24737,18 +22553,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24786,19 +22603,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24838,23 +22655,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24896,24 +22710,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24956,19 +22766,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25009,24 +22819,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25065,38 +22871,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25121,39 +22900,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25180,51 +22931,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25253,52 +22965,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25328,39 +23000,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25388,52 +23032,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25464,38 +23068,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25520,39 +23097,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25579,51 +23128,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25652,52 +23162,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25727,39 +23197,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25787,52 +23229,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25869,16 +23271,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25914,17 +23323,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25962,11 +23377,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -25974,17 +23388,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26024,12 +23434,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -26037,17 +23445,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26088,17 +23492,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26137,12 +23547,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -26150,17 +23558,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26202,19 +23606,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26253,20 +23663,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26307,11 +23722,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -26319,20 +23733,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26375,12 +23784,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -26388,20 +23795,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26445,20 +23847,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26500,12 +23907,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -26513,20 +23918,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -30025,40 +27425,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30079,40 +27453,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30135,53 +27483,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30206,53 +27516,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30278,41 +27550,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30336,54 +27581,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30410,40 +27616,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30464,40 +27644,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30520,53 +27674,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30591,53 +27707,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30663,41 +27741,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30721,54 +27772,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30801,16 +27813,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -30845,16 +27864,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -30891,11 +27917,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -30903,17 +27928,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -30952,11 +27973,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -30964,17 +27984,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31014,17 +28030,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31062,12 +28084,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -31075,17 +28095,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31126,19 +28142,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31176,19 +28198,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31228,11 +28256,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -31240,20 +28267,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31295,11 +28317,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -31307,20 +28328,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31363,20 +28379,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31417,12 +28438,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -31430,20 +28449,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -33062,16 +30076,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33092,16 +30104,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33124,23 +30134,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33165,23 +30167,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33207,17 +30201,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33241,24 +30232,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33286,26 +30268,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33326,26 +30296,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33368,33 +30326,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33419,33 +30359,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33471,27 +30393,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33515,34 +30424,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33569,26 +30459,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33609,26 +30487,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33651,33 +30517,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33702,33 +30550,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33754,27 +30584,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33798,34 +30615,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33858,16 +30656,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -33902,16 +30701,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -33948,23 +30748,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34003,23 +30798,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34059,17 +30849,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34107,24 +30897,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34165,19 +30949,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34215,19 +30999,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34267,26 +31051,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34328,26 +31106,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34390,20 +31162,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34444,27 +31215,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34503,39 +31267,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34560,39 +31296,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34619,52 +31327,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34693,52 +31361,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34768,40 +31396,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34829,53 +31428,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34906,39 +31464,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -34963,39 +31493,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35022,52 +31524,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35096,52 +31558,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35171,40 +31593,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35232,53 +31625,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -35315,17 +31667,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35361,17 +31719,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35409,12 +31773,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35422,17 +31784,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35472,12 +31830,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35485,17 +31841,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35536,18 +31888,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35586,13 +31943,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35600,17 +31954,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35652,20 +32002,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35704,20 +32059,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35758,12 +32118,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35771,20 +32129,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35827,12 +32180,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35840,20 +32191,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35897,21 +32243,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35953,13 +32303,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35967,20 +32314,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37003,37 +33345,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37054,37 +33373,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovaps (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37106,38 +33402,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37161,38 +33433,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -37219,38 +33465,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vmovaps (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -37278,39 +33498,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 -; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -37340,37 +33533,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37391,37 +33561,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovaps (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37443,38 +33590,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37498,38 +33621,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -37556,38 +33653,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vmovaps (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -37615,39 +33686,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 -; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -37683,13 +33727,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37724,13 +33778,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovaps (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37766,14 +33830,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37811,16 +33884,24 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37858,16 +33939,24 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vmovaps (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37906,17 +33995,24 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 -; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37957,16 +34053,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -38004,16 +34109,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovaps (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -38052,17 +34166,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -38103,19 +34225,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -38156,19 +34285,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vmovaps (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -38210,20 +34346,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 -; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -39936,13 +36078,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -39963,13 +36106,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -39991,14 +36135,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -40022,16 +36166,15 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -40055,16 +36198,15 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -40089,17 +36231,15 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -40127,23 +36267,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40164,23 +36295,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40202,24 +36324,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40243,24 +36355,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -40287,24 +36387,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -40332,25 +36420,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -40380,23 +36455,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40417,23 +36483,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40455,24 +36512,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40496,24 +36543,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -40540,24 +36575,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -40585,25 +36608,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -40639,13 +36649,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40680,13 +36694,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40722,14 +36740,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40767,16 +36788,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40814,16 +36837,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40862,17 +36887,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40913,16 +36939,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40960,16 +36989,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41008,17 +37040,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41059,19 +37093,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41112,19 +37147,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41166,20 +37202,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41218,36 +37254,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41272,36 +37283,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41327,37 +37313,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41385,39 +37345,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41445,39 +37378,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41506,40 +37412,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 -; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41570,36 +37448,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41624,36 +37477,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41679,37 +37507,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41737,39 +37539,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41797,39 +37572,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41858,40 +37606,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 -; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41928,14 +37648,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41971,14 +37700,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42015,15 +37753,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42062,17 +37808,24 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42111,17 +37864,24 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42161,18 +37921,24 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 -; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42214,17 +37980,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42263,17 +38037,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42313,18 +38095,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42366,20 +38155,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42421,20 +38216,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vmovapd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42477,21 +38278,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 -; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -43710,40 +39516,14 @@ ; ; NoVLX-LABEL: mask_zero_lower: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $4, %k2, %k3 -; NoVLX-NEXT: kxorw %k1, %k3, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $11, %k1, %k1 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftrw $5, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $10, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $9, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $8, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq %cmp = icmp ult <4 x i32> %a, zeroinitializer %concat = shufflevector <4 x i1> %cmp, <4 x i1> zeroinitializer, <8 x i32> Index: test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll =================================================================== --- test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll +++ test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll @@ -48,7 +48,6 @@ ; ; AVX512F-LABEL: ext_i2_2i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: andb $3, %dil ; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} ; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 @@ -98,7 +97,6 @@ ; ; AVX512F-LABEL: ext_i4_4i32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: andb $15, %dil ; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} ; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 @@ -289,7 +287,6 @@ ; ; AVX512F-LABEL: ext_i4_4i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: andb $15, %dil ; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} ; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 Index: test/CodeGen/X86/compress_expand.ll =================================================================== --- test/CodeGen/X86/compress_expand.ll +++ test/CodeGen/X86/compress_expand.ll @@ -200,11 +200,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL-NEXT: vmovdqa %ymm1, %ymm1 -; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL-NEXT: kshiftlw $12, %k0, %k0 +; KNL-NEXT: kshiftrw $12, %k0, %k1 ; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq call void @llvm.masked.compressstore.v4i64(<4 x i64> %V, i64* %base, <4 x i1> %mask) @@ -223,10 +221,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vpsllq $63, %xmm1, %xmm1 -; KNL-NEXT: vpsraq $63, %zmm1, %zmm1 -; KNL-NEXT: vmovdqa %xmm1, %xmm1 -; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL-NEXT: kshiftlw $14, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k1 ; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq call void @llvm.masked.compressstore.v2i64(<2 x i64> %V, i64* %base, <2 x i1> %mask) @@ -245,10 +242,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL-NEXT: vmovdqa %xmm1, %xmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL-NEXT: kshiftlw $12, %k0, %k0 +; KNL-NEXT: kshiftrw $12, %k0, %k1 ; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq call void @llvm.masked.compressstore.v4f32(<4 x float> %V, float* %base, <4 x i1> %mask) @@ -269,11 +265,9 @@ ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; KNL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; KNL-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL-NEXT: vmovaps %xmm1, %xmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL-NEXT: vpcmpeqq %zmm2, %zmm1, %k0 +; KNL-NEXT: kshiftlw $14, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k1 ; KNL-NEXT: vexpandps (%rdi), %zmm0 {%k1} ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq @@ -296,11 +290,9 @@ ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; KNL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; KNL-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL-NEXT: vmovaps %xmm1, %xmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL-NEXT: vpcmpeqq %zmm2, %zmm1, %k0 +; KNL-NEXT: kshiftlw $14, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k1 ; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq %mask = icmp eq <2 x i32> %trigger, zeroinitializer Index: test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- test/CodeGen/X86/masked_gather_scatter.ll +++ test/CodeGen/X86/masked_gather_scatter.ll @@ -812,11 +812,12 @@ ; KNL_64-LABEL: test15: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_64-NEXT: vmovdqa %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm2 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm0 -; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1} +; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 +; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1 +; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -824,12 +825,13 @@ ; KNL_32-LABEL: test15: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_32-NEXT: vmovdqa %xmm1, %xmm1 +; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm2 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm0 -; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm0 {%k1} +; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1 +; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -864,12 +866,10 @@ ; KNL_64-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 ; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_64-NEXT: vmovdqa %ymm1, %ymm1 +; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1} ; KNL_64-NEXT: vmovapd %ymm2, %ymm0 ; KNL_64-NEXT: retq @@ -879,13 +879,11 @@ ; KNL_32-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 ; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_32-NEXT: vmovdqa %ymm1, %ymm1 +; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1} ; KNL_32-NEXT: vmovapd %ymm2, %ymm0 ; KNL_32-NEXT: retl @@ -919,9 +917,10 @@ ; KNL_64-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 ; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0 -; KNL_64-NEXT: vmovdqa %xmm1, %xmm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1} ; KNL_64-NEXT: vmovapd %xmm2, %xmm0 ; KNL_64-NEXT: vzeroupper @@ -932,10 +931,11 @@ ; KNL_32-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 ; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0 -; KNL_32-NEXT: vmovdqa %xmm1, %xmm1 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1} ; KNL_32-NEXT: vmovapd %xmm2, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -979,9 +979,10 @@ ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_64-NEXT: vmovdqa %xmm2, %xmm2 -; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 +; KNL_64-NEXT: vpslld $31, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -990,10 +991,11 @@ ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 +; KNL_32-NEXT: vpslld $31, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1022,11 +1024,9 @@ ; KNL_64-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 ; KNL_64-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_64-NEXT: vmovdqa %ymm1, %ymm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vscatterqpd %zmm0, (%rdi,%zmm2,8) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -1036,12 +1036,10 @@ ; KNL_32-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 ; KNL_32-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_32-NEXT: vmovdqa %ymm1, %ymm1 +; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm2,8) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1073,10 +1071,10 @@ ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm2, %xmm2 -; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -1084,12 +1082,12 @@ ; KNL_32-LABEL: test20: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 +; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; KNL_32-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm2, %xmm2 ; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1119,10 +1117,11 @@ ; KNL_64-LABEL: test21: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 -; KNL_64-NEXT: vmovdqa %xmm2, %xmm2 +; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -1131,10 +1130,11 @@ ; KNL_32: # %bb.0: ; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1 ; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 +; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1170,12 +1170,12 @@ ; KNL_64-LABEL: test22: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm2 killed %xmm2 def %ymm2 +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm1, %xmm1 ; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1} ; KNL_64-NEXT: vmovaps %xmm2, %xmm0 ; KNL_64-NEXT: vzeroupper @@ -1184,13 +1184,13 @@ ; KNL_32-LABEL: test22: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm2 killed %xmm2 def %ymm2 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm1, %xmm1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1} ; KNL_32-NEXT: vmovaps %xmm2, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -1225,10 +1225,10 @@ ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm2 killed %xmm2 def %ymm2 ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm1, %xmm1 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1} ; KNL_64-NEXT: vmovaps %xmm2, %xmm0 ; KNL_64-NEXT: vzeroupper @@ -1238,11 +1238,11 @@ ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm2 killed %xmm2 def %ymm2 ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm1, %xmm1 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1} ; KNL_32-NEXT: vmovaps %xmm2, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -1275,30 +1275,30 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) { ; KNL_64-LABEL: test23: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm1, %xmm1 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1} -; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 +; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k1} +; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test23: ; KNL_32: # %bb.0: +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm1, %xmm1 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1} -; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 +; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1} +; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl ; @@ -1332,27 +1332,27 @@ ; KNL_64-LABEL: test23b: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; KNL_64-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm1, %xmm1 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1} -; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 +; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k1} +; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test23b: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm1, %xmm1 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1} -; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero +; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 +; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1} +; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl ; @@ -1433,9 +1433,10 @@ ; KNL_64-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 ; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0 -; KNL_64-NEXT: vmovdqa %xmm1, %xmm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1} ; KNL_64-NEXT: vmovdqa %xmm2, %xmm0 ; KNL_64-NEXT: vzeroupper @@ -1446,10 +1447,11 @@ ; KNL_32-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 ; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0 -; KNL_32-NEXT: vmovdqa %xmm1, %xmm1 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1} ; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -1500,10 +1502,8 @@ ; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 +; KNL_32-NEXT: movb $3, %cl +; KNL_32-NEXT: kmovw %ecx, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1} ; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -1597,10 +1597,8 @@ ; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1 ; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 +; KNL_32-NEXT: movb $3, %al +; KNL_32-NEXT: kmovw %eax, %k1 ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1686,83 +1684,80 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) { ; KNL_64-LABEL: test30: ; KNL_64: # %bb.0: +; KNL_64-NEXT: # kill: def %xmm3 killed %xmm3 def %zmm3 +; KNL_64-NEXT: vpslld $31, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 +; KNL_64-NEXT: kmovw %k1, %eax ; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 ; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1 ; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1 -; KNL_64-NEXT: testb $1, %dil +; KNL_64-NEXT: testb $1, %al ; KNL_64-NEXT: # implicit-def: %xmm0 -; KNL_64-NEXT: jne .LBB31_1 -; KNL_64-NEXT: # %bb.2: # %else -; KNL_64-NEXT: testb $1, %sil -; KNL_64-NEXT: jne .LBB31_3 -; KNL_64-NEXT: .LBB31_4: # %else2 -; KNL_64-NEXT: testb $1, %dl -; KNL_64-NEXT: jne .LBB31_5 -; KNL_64-NEXT: .LBB31_6: # %else5 -; KNL_64-NEXT: vmovd %edi, %xmm1 -; KNL_64-NEXT: vpinsrb $4, %esi, %xmm1, %xmm1 -; KNL_64-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1 -; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; KNL_64-NEXT: .LBB31_1: # %cond.load +; KNL_64-NEXT: je .LBB31_2 +; KNL_64-NEXT: # %bb.1: # %cond.load ; KNL_64-NEXT: vmovq %xmm1, %rax ; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; KNL_64-NEXT: testb $1, %sil +; KNL_64-NEXT: .LBB31_2: # %else +; KNL_64-NEXT: kshiftrw $1, %k1, %k0 +; KNL_64-NEXT: kmovw %k0, %eax +; KNL_64-NEXT: testb $1, %al ; KNL_64-NEXT: je .LBB31_4 -; KNL_64-NEXT: .LBB31_3: # %cond.load1 +; KNL_64-NEXT: # %bb.3: # %cond.load1 ; KNL_64-NEXT: vpextrq $1, %xmm1, %rax ; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0 -; KNL_64-NEXT: testb $1, %dl +; KNL_64-NEXT: .LBB31_4: # %else2 +; KNL_64-NEXT: kshiftrw $2, %k1, %k0 +; KNL_64-NEXT: kmovw %k0, %eax +; KNL_64-NEXT: testb $1, %al ; KNL_64-NEXT: je .LBB31_6 -; KNL_64-NEXT: .LBB31_5: # %cond.load4 +; KNL_64-NEXT: # %bb.5: # %cond.load4 ; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1 ; KNL_64-NEXT: vmovq %xmm1, %rax ; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0 -; KNL_64-NEXT: jmp .LBB31_6 +; KNL_64-NEXT: .LBB31_6: # %else5 +; KNL_64-NEXT: vmovdqa32 %zmm0, %zmm3 {%k1} +; KNL_64-NEXT: vmovdqa %xmm3, %xmm0 +; KNL_64-NEXT: vzeroupper +; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test30: ; KNL_32: # %bb.0: -; KNL_32-NEXT: pushl %esi -; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: .cfi_offset %esi, -8 -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %edx +; KNL_32-NEXT: subl $12, %esp +; KNL_32-NEXT: .cfi_def_cfa_offset 16 +; KNL_32-NEXT: vpslld $31, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1 +; KNL_32-NEXT: kmovw %k1, %eax ; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1 -; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1 -; KNL_32-NEXT: testb $1, %dl -; KNL_32-NEXT: # implicit-def: %xmm0 -; KNL_32-NEXT: jne .LBB31_1 -; KNL_32-NEXT: # %bb.2: # %else -; KNL_32-NEXT: testb $1, %cl -; KNL_32-NEXT: jne .LBB31_3 -; KNL_32-NEXT: .LBB31_4: # %else2 +; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; KNL_32-NEXT: testb $1, %al +; KNL_32-NEXT: # implicit-def: %xmm1 +; KNL_32-NEXT: je .LBB31_2 +; KNL_32-NEXT: # %bb.1: # %cond.load +; KNL_32-NEXT: vmovd %xmm2, %eax +; KNL_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; KNL_32-NEXT: .LBB31_2: # %else +; KNL_32-NEXT: kshiftrw $1, %k1, %k0 +; KNL_32-NEXT: kmovw %k0, %eax ; KNL_32-NEXT: testb $1, %al -; KNL_32-NEXT: jne .LBB31_5 -; KNL_32-NEXT: .LBB31_6: # %else5 -; KNL_32-NEXT: vmovd %edx, %xmm1 -; KNL_32-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 -; KNL_32-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 -; KNL_32-NEXT: popl %esi -; KNL_32-NEXT: retl -; KNL_32-NEXT: .LBB31_1: # %cond.load -; KNL_32-NEXT: vmovd %xmm1, %esi -; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; KNL_32-NEXT: testb $1, %cl ; KNL_32-NEXT: je .LBB31_4 -; KNL_32-NEXT: .LBB31_3: # %cond.load1 -; KNL_32-NEXT: vpextrd $1, %xmm1, %esi -; KNL_32-NEXT: vpinsrd $1, (%esi), %xmm0, %xmm0 +; KNL_32-NEXT: # %bb.3: # %cond.load1 +; KNL_32-NEXT: vpextrd $1, %xmm2, %eax +; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1 +; KNL_32-NEXT: .LBB31_4: # %else2 +; KNL_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0 +; KNL_32-NEXT: kshiftrw $2, %k1, %k0 +; KNL_32-NEXT: kmovw %k0, %eax ; KNL_32-NEXT: testb $1, %al ; KNL_32-NEXT: je .LBB31_6 -; KNL_32-NEXT: .LBB31_5: # %cond.load4 -; KNL_32-NEXT: vpextrd $2, %xmm1, %esi -; KNL_32-NEXT: vpinsrd $2, (%esi), %xmm0, %xmm0 -; KNL_32-NEXT: jmp .LBB31_6 +; KNL_32-NEXT: # %bb.5: # %cond.load4 +; KNL_32-NEXT: vpextrd $2, %xmm2, %eax +; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1 +; KNL_32-NEXT: .LBB31_6: # %else5 +; KNL_32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL_32-NEXT: addl $12, %esp +; KNL_32-NEXT: vzeroupper +; KNL_32-NEXT: retl ; ; SKX-LABEL: test30: ; SKX: # %bb.0: @@ -2355,11 +2350,9 @@ ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_64-NEXT: vmovdqa %ymm1, %ymm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1} ; KNL_64-NEXT: vpaddq %ymm1, %ymm1, %ymm0 ; KNL_64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 @@ -2376,12 +2369,10 @@ ; KNL_32-NEXT: subl $32, %esp ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 ; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_32-NEXT: vmovdqa %ymm1, %ymm1 +; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1} ; KNL_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0 ; KNL_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 @@ -2547,14 +2538,14 @@ ; KNL_64-LABEL: large_index: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 -; KNL_64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm0, %xmm0 -; KNL_64-NEXT: vmovq %rcx, %xmm2 -; KNL_64-NEXT: vmovq %rsi, %xmm3 -; KNL_64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; KNL_64-NEXT: vpslld $31, %ymm0, %ymm0 -; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm1 {%k1} +; KNL_64-NEXT: vpsllq $63, %xmm0, %xmm0 +; KNL_64-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 +; KNL_64-NEXT: vmovq %rcx, %xmm0 +; KNL_64-NEXT: vmovq %rsi, %xmm2 +; KNL_64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k1} ; KNL_64-NEXT: vmovaps %xmm1, %xmm0 ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -2562,16 +2553,16 @@ ; KNL_32-LABEL: large_index: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 -; KNL_32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm0, %xmm0 +; KNL_32-NEXT: vpsllq $63, %xmm0, %xmm0 +; KNL_32-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; KNL_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; KNL_32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; KNL_32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; KNL_32-NEXT: vpslld $31, %ymm0, %ymm0 -; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm1 {%k1} +; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm1 {%k1} ; KNL_32-NEXT: vmovaps %xmm1, %xmm0 ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -2700,9 +2691,10 @@ ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL_64-NEXT: vpsllq $32, %xmm1, %xmm1 ; KNL_64-NEXT: vpsraq $32, %zmm1, %zmm1 -; KNL_64-NEXT: vmovdqa %xmm2, %xmm2 -; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vscatterqpd %zmm0, (%rdi,%zmm1,8) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -2712,10 +2704,11 @@ ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1 ; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 +; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm1,8) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl Index: test/CodeGen/X86/masked_memop.ll =================================================================== --- test/CodeGen/X86/masked_memop.ll +++ test/CodeGen/X86/masked_memop.ll @@ -99,10 +99,15 @@ ; ; AVX512F-LABEL: test6: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test6: @@ -127,10 +132,15 @@ ; ; AVX512F-LABEL: test7: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vblendmps (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test7: @@ -163,10 +173,15 @@ ; ; AVX512F-LABEL: test8: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vpblendmd (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test8: @@ -197,9 +212,14 @@ ; ; AVX512F-LABEL: test9: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test9: @@ -237,11 +257,14 @@ ; ; AVX512F-LABEL: test10: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovsxdq %xmm0, %ymm0 -; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 -; AVX512F-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: test10: @@ -277,10 +300,13 @@ ; ; AVX512F-LABEL: test10b: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovsxdq %xmm0, %ymm0 -; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: test10b: @@ -525,11 +551,14 @@ ; ; AVX512F-LABEL: test14: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vmovups %zmm1, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test14: @@ -569,10 +598,12 @@ ; AVX512F: ## %bb.0: ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX512F-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test15: @@ -610,12 +641,15 @@ ; ; AVX512F-LABEL: test16: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vblendmps (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test16: @@ -659,12 +693,13 @@ ; AVX512F: ## %bb.0: ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} ; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test17: @@ -704,9 +739,12 @@ ; AVX512F: ## %bb.0: ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX512F-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test18: @@ -729,8 +767,11 @@ ; ; AVX512F-LABEL: load_all: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: movw $15, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: load_all: @@ -755,9 +796,12 @@ ; ; AVX512F-LABEL: mload_constmask_v4f32: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovaps {{.*#+}} xmm1 = [4294967295,0,4294967295,4294967295] -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm1, %xmm2 -; AVX512F-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: movw $13, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4f32: @@ -789,9 +833,12 @@ ; ; AVX512F-LABEL: mload_constmask_v4i32: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295] -; AVX512F-NEXT: vpmaskmovd (%rdi), %xmm1, %xmm2 -; AVX512F-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: movw $14, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4i32: @@ -843,9 +890,11 @@ ; ; AVX512F-LABEL: mload_constmask_v4f64: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovapd {{.*#+}} ymm1 = [18446744073709551615,18446744073709551615,18446744073709551615,0] -; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm1, %ymm2 -; AVX512F-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512F-NEXT: movb $7, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4f64: @@ -898,9 +947,11 @@ ; ; AVX512F-LABEL: mload_constmask_v4i64: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,0,0,18446744073709551615] -; AVX512F-NEXT: vpmaskmovq (%rdi), %ymm1, %ymm2 -; AVX512F-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512F-NEXT: movb $9, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4i64: @@ -950,8 +1001,10 @@ ; ; AVX512F-LABEL: mload_constmask_v4f64_undef_passthrough: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,18446744073709551615,0] -; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 +; AVX512F-NEXT: movb $7, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4f64_undef_passthrough: @@ -979,8 +1032,10 @@ ; ; AVX512F-LABEL: mload_constmask_v4i64_undef_passthrough: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551615,0] -; AVX512F-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm0 +; AVX512F-NEXT: movb $6, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4i64_undef_passthrough: @@ -1008,8 +1063,11 @@ ; ; AVX512F-LABEL: test21: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: movw $15, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test21: @@ -1225,7 +1283,14 @@ ; ; AVX512F-LABEL: trunc_mask: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmaskmovps %xmm0, %xmm2, (%rdi) +; AVX512F-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm1, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vmovups %zmm0, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: trunc_mask: Index: test/CodeGen/X86/pr33349.ll =================================================================== --- test/CodeGen/X86/pr33349.ll +++ test/CodeGen/X86/pr33349.ll @@ -8,32 +8,38 @@ define void @test(<4 x i1> %m, <4 x x86_fp80> %v, <4 x x86_fp80>*%p) local_unnamed_addr { ; KNL-LABEL: test: ; KNL: # %bb.0: # %bb -; KNL-NEXT: vpextrb $0, %xmm0, %eax +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftrw $1, %k0, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftrw $2, %k0, %k1 +; KNL-NEXT: kshiftrw $1, %k1, %k2 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fld1 ; KNL-NEXT: fldz ; KNL-NEXT: fld %st(0) ; KNL-NEXT: fcmovne %st(2), %st(0) -; KNL-NEXT: vpextrb $4, %xmm0, %eax -; KNL-NEXT: testb $1, %al +; KNL-NEXT: testb $1, %cl ; KNL-NEXT: fld %st(1) ; KNL-NEXT: fcmovne %st(3), %st(0) -; KNL-NEXT: vpextrb $8, %xmm0, %eax +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fld %st(2) ; KNL-NEXT: fcmovne %st(4), %st(0) -; KNL-NEXT: vpextrb $12, %xmm0, %eax +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fxch %st(3) ; KNL-NEXT: fcmovne %st(4), %st(0) ; KNL-NEXT: fstp %st(4) ; KNL-NEXT: fxch %st(3) +; KNL-NEXT: fstpt (%rdi) +; KNL-NEXT: fxch %st(1) ; KNL-NEXT: fstpt 30(%rdi) ; KNL-NEXT: fxch %st(1) ; KNL-NEXT: fstpt 20(%rdi) -; KNL-NEXT: fxch %st(1) ; KNL-NEXT: fstpt 10(%rdi) -; KNL-NEXT: fstpt (%rdi) +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test: Index: test/CodeGen/X86/sse-fsignum.ll =================================================================== --- test/CodeGen/X86/sse-fsignum.ll +++ test/CodeGen/X86/sse-fsignum.ll @@ -10,17 +10,44 @@ ; define void @signum32a(<4 x float>*) { -; AVX-LABEL: signum32a: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovaps (%rdi), %xmm0 -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vcvtdq2ps %xmm2, %xmm2 -; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 -; AVX-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vmovaps %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: signum32a: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovaps (%rdi), %xmm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vcvtdq2ps %xmm2, %xmm2 +; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX1-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vmovaps %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: signum32a: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovaps (%rdi), %xmm0 +; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vcvtdq2ps %xmm2, %xmm2 +; AVX2-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX2-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: vmovaps %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signum32a: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vmovaps (%rdi), %xmm0 +; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; AVX512F-NEXT: vcvtdq2ps %xmm2, %xmm2 +; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX512F-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vmovaps %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq entry: %1 = load <4 x float>, <4 x float>* %0 %2 = fcmp olt <4 x float> %1, zeroinitializer @@ -33,19 +60,48 @@ } define void @signum64a(<2 x double>*) { -; AVX-LABEL: signum64a: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovapd (%rdi), %xmm0 -; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3] -; AVX-NEXT: vcvtdq2pd %xmm2, %xmm2 -; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX-NEXT: vsubpd %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vmovapd %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: signum64a: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovapd (%rdi), %xmm0 +; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3] +; AVX1-NEXT: vcvtdq2pd %xmm2, %xmm2 +; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vmovapd %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: signum64a: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovapd (%rdi), %xmm0 +; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3] +; AVX2-NEXT: vcvtdq2pd %xmm2, %xmm2 +; AVX2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: vmovapd %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signum64a: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vmovapd (%rdi), %xmm0 +; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vcmpltpd %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; AVX512F-NEXT: vcvtdq2pd %xmm2, %xmm2 +; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512F-NEXT: vsubpd %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vmovapd %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq entry: %1 = load <2 x double>, <2 x double>* %0 %2 = fcmp olt <2 x double> %1, zeroinitializer @@ -152,11 +208,11 @@ ; AVX512F: # %bb.0: # %entry ; AVX512F-NEXT: vmovapd (%rdi), %ymm0 ; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vpmovqd %zmm2, %ymm2 +; AVX512F-NEXT: vcmpltpd %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; AVX512F-NEXT: vcvtdq2pd %xmm2, %ymm2 -; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX512F-NEXT: vsubpd %ymm0, %ymm2, %ymm0 ; AVX512F-NEXT: vmovapd %ymm0, (%rdi) Index: test/CodeGen/X86/vector-shuffle-v1.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-v1.ll +++ test/CodeGen/X86/vector-shuffle-v1.ll @@ -6,7 +6,15 @@ define <2 x i1> @shuf2i1_1_0(<2 x i1> %a) { ; AVX512F-LABEL: shuf2i1_1_0: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf2i1_1_0: @@ -37,9 +45,17 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) { ; AVX512F-LABEL: shuf2i1_1_2: ; AVX512F: # %bb.0: -; AVX512F-NEXT: movl $1, %eax +; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: movq $-1, %rax ; AVX512F-NEXT: vmovq %rax, %xmm1 ; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf2i1_1_2: @@ -75,7 +91,15 @@ define <4 x i1> @shuf4i1_3_2_10(<4 x i1> %a) { ; AVX512F-LABEL: shuf4i1_3_2_10: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] +; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] +; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf4i1_3_2_10: Index: test/CodeGen/X86/vselect-pcmp.ll =================================================================== --- test/CodeGen/X86/vselect-pcmp.ll +++ test/CodeGen/X86/vselect-pcmp.ll @@ -43,10 +43,22 @@ } define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { -; AVX12F-LABEL: signbit_sel_v4i32: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4i32: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4i32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512F-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4i32: ; AVX512VL: # %bb.0: @@ -60,10 +72,22 @@ } define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) { -; AVX12F-LABEL: signbit_sel_v2i64: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v2i64: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v2i64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512F-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v2i64: ; AVX512VL: # %bb.0: @@ -77,10 +101,22 @@ } define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) { -; AVX12F-LABEL: signbit_sel_v4f32: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4f32: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4f32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512F-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4f32: ; AVX512VL: # %bb.0: @@ -94,10 +130,22 @@ } define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) { -; AVX12F-LABEL: signbit_sel_v2f64: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v2f64: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v2f64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512F-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v2f64: ; AVX512VL: # %bb.0: @@ -203,10 +251,21 @@ } define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) { -; AVX12F-LABEL: signbit_sel_v4i64: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4i64: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4i64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; AVX512F-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4i64: ; AVX512VL: # %bb.0: @@ -220,10 +279,21 @@ } define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) { -; AVX12F-LABEL: signbit_sel_v4f64: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4f64: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4f64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; AVX512F-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4f64: ; AVX512VL: # %bb.0: @@ -256,8 +326,13 @@ ; ; AVX512F-LABEL: signbit_sel_v4f64_small_mask: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovsxdq %xmm2, %ymm2 -; AVX512F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512F-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4f64_small_mask: @@ -296,12 +371,23 @@ ; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded. define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 { -; AVX12F-LABEL: signbit_sel_v4f32_fcmp: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX12F-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 -; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4f32_fcmp: +; AVX12: # %bb.0: +; AVX12-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX12-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 +; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4f32_fcmp: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vcmpltps %zmm2, %zmm0, %k1 +; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4f32_fcmp: ; AVX512VL: # %bb.0: