Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3616,6 +3616,7 @@ SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDLoc dl(N); + EVT VT = N->getValueType(0); // WARNING: In this code we widen the compare instruction with garbage. // This garbage may contain denormal floats which may be slow. Is this a real @@ -3625,18 +3626,23 @@ // Only some of the compared elements are legal. EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), InOp0.getValueType()); + // The result type is legal, if its vXi1, keep vXi1 for the new SETCC. + if (VT.getScalarType() == MVT::i1) + SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + SVT.getVectorNumElements()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), - SVT, InOp0, InOp1, N->getOperand(2)); + SVT, InOp0, InOp1, N->getOperand(2)); // Extract the needed results from the result vector. EVT ResVT = EVT::getVectorVT(*DAG.getContext(), SVT.getVectorElementType(), - N->getValueType(0).getVectorNumElements()); + VT.getVectorNumElements()); SDValue CC = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - return PromoteTargetBoolean(CC, N->getValueType(0)); + return PromoteTargetBoolean(CC, VT); } Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -460,7 +460,7 @@ // this happens we will use 512-bit operations and the mask will not be // zero extended. EVT OpVT = N->getOperand(0).getValueType(); - if (OpVT == MVT::v8i32 || OpVT == MVT::v8f32) + if (OpVT.is256BitVector() || OpVT.is128BitVector()) return Subtarget->hasVLX(); return true; Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -1144,6 +1144,8 @@ addRegisterClass(MVT::v8f64, &X86::VR512RegClass); addRegisterClass(MVT::v1i1, &X86::VK1RegClass); + addRegisterClass(MVT::v2i1, &X86::VK2RegClass); + addRegisterClass(MVT::v4i1, &X86::VK4RegClass); addRegisterClass(MVT::v8i1, &X86::VK8RegClass); addRegisterClass(MVT::v16i1, &X86::VK16RegClass); @@ -1171,15 +1173,14 @@ setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom); } - // Extends of v16i1/v8i1 to 128-bit vectors. - setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i8, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v16i8, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i16, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v8i16, Custom); + // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors. + for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { + setOperationAction(ISD::SIGN_EXTEND, VT, Custom); + setOperationAction(ISD::ZERO_EXTEND, VT, Custom); + setOperationAction(ISD::ANY_EXTEND, VT, Custom); + } - for (auto VT : { MVT::v8i1, MVT::v16i1 }) { + for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) { setOperationAction(ISD::ADD, VT, Custom); setOperationAction(ISD::SUB, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); @@ -1195,9 +1196,12 @@ } setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); - for (auto VT : { MVT::v1i1, MVT::v8i1 }) + for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); for (MVT VT : MVT::fp_vector_valuetypes()) @@ -1528,41 +1532,6 @@ } if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { - addRegisterClass(MVT::v4i1, &X86::VK4RegClass); - addRegisterClass(MVT::v2i1, &X86::VK2RegClass); - - for (auto VT : { MVT::v2i1, MVT::v4i1 }) { - setOperationAction(ISD::ADD, VT, Custom); - setOperationAction(ISD::SUB, VT, Custom); - setOperationAction(ISD::MUL, VT, Custom); - setOperationAction(ISD::VSELECT, VT, Expand); - - setOperationAction(ISD::TRUNCATE, VT, Custom); - setOperationAction(ISD::SETCC, VT, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); - setOperationAction(ISD::SELECT, VT, Custom); - setOperationAction(ISD::BUILD_VECTOR, VT, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); - } - - // TODO: v8i1 concat should be legal without VLX to support concats of - // v1i1, but we won't legalize it correctly currently without introducing - // a v4i1 concat in the middle. - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom); - for (auto VT : { MVT::v2i1, MVT::v4i1 }) - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - - // Extends from v2i1/v4i1 masks to 128-bit vectors. - setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Custom); - setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Custom); - setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); @@ -4945,8 +4914,6 @@ } else if (VT.getVectorElementType() == MVT::i1) { assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && "Unexpected vector type"); - assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && - "Unexpected vector type"); Vec = DAG.getConstant(0, dl, VT); } else { unsigned Num32BitElts = VT.getSizeInBits() / 32; @@ -17779,6 +17746,19 @@ assert(EltVT == MVT::f32 || EltVT == MVT::f64); #endif + // Custom widen MVT::v2f32 to prevent the default widening + // from getting a result type of v4i32, extracting it to v2i32 and then + // trying to sign extend that to v2i1. + if (VT == MVT::v2i1 && Op1.getValueType() == MVT::v2f32) { + Op0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Op0, + DAG.getUNDEF(MVT::v2f32)); + Op1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Op1, + DAG.getUNDEF(MVT::v2f32)); + SDValue NewOp = DAG.getNode(ISD::SETCC, dl, MVT::v4i1, Op0, Op1, CC); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, NewOp, + DAG.getIntPtrConstant(0, dl)); + } + unsigned Opc; if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1) { assert(VT.getVectorNumElements() <= 16); @@ -24417,8 +24397,8 @@ // Mask // At this point we have promoted mask operand - assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); - MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); + assert(MaskVT.getScalarType() == MVT::i1 && "unexpected mask type"); + MVT ExtMaskVT = MVT::getVectorVT(MVT::i1, NumElts); // Use the original mask here, do not modify the mask twice Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true); @@ -24427,12 +24407,9 @@ Src = ExtendToType(Src, NewVT, DAG); } } - // If the mask is "wide" at this point - truncate it to i1 vector - MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts); - Mask = DAG.getNode(ISD::TRUNCATE, dl, BitMaskVT, Mask); // The mask is killed by scatter, add it to the values - SDVTList VTs = DAG.getVTList(BitMaskVT, MVT::Other); + SDVTList VTs = DAG.getVTList(Mask.getValueType(), MVT::Other); SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index}; SDValue NewScatter = DAG.getTargetMemSDNode( VTs, Ops, dl, N->getMemoryVT(), N->getMemOperand()); @@ -24455,11 +24432,6 @@ assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) && "Expanding masked load is supported for 32 and 64-bit types only!"); - // 4x32, 4x64 and 2x64 vectors of non-expanding loads are legal regardless of - // VLX. These types for exp-loads are handled here. - if (!N->isExpandingLoad() && VT.getVectorNumElements() <= 4) - return Op; - assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked load op."); @@ -24476,16 +24448,12 @@ Src0 = ExtendToType(Src0, WideDataVT, DAG); // Mask element has to be i1. - MVT MaskEltTy = Mask.getSimpleValueType().getScalarType(); - assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) && - "We handle 4x32, 4x64 and 2x64 vectors only in this case"); + assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && + "Unexpected mask type"); - MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); - if (MaskEltTy != MVT::i1) - Mask = DAG.getNode(ISD::TRUNCATE, dl, - MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask); SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), N->getMemOperand(), @@ -24514,10 +24482,6 @@ assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) && "Expanding masked load is supported for 32 and 64-bit types only!"); - // 4x32 and 2x64 vectors of non-compressing stores are legal regardless to VLX. - if (!N->isCompressingStore() && VT.getVectorNumElements() <= 4) - return Op; - assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && "Cannot lower masked store op."); @@ -24532,17 +24496,13 @@ MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); // Mask element has to be i1. - MVT MaskEltTy = Mask.getSimpleValueType().getScalarType(); - assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) && - "We handle 4x32, 4x64 and 2x64 vectors only in this case"); + assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && + "Unexpected mask type"); - MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); Mask = ExtendToType(Mask, WideMaskVT, DAG, true); - if (MaskEltTy != MVT::i1) - Mask = DAG.getNode(ISD::TRUNCATE, dl, - MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask); return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), Mask, N->getMemoryVT(), N->getMemOperand(), N->isTruncatingStore(), N->isCompressingStore()); @@ -24592,12 +24552,9 @@ Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index); // Mask - MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts); - // At this point we have promoted mask operand - assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type"); - MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts); - Mask = ExtendToType(Mask, ExtMaskVT, DAG, true); - Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask); + assert(MaskVT.getScalarType() == MVT::i1 && "unexpected mask type"); + MaskVT = MVT::getVectorVT(MVT::i1, NumElts); + Mask = ExtendToType(Mask, MaskVT, DAG, true); // The pass-through value MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts); @@ -24605,7 +24562,7 @@ SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; SDValue NewGather = DAG.getTargetMemSDNode( - DAG.getVTList(NewVT, MaskBitVT, MVT::Other), Ops, dl, N->getMemoryVT(), + DAG.getVTList(NewVT, MaskVT, MVT::Other), Ops, dl, N->getMemoryVT(), N->getMemOperand()); SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewGather.getValue(0), @@ -30447,7 +30404,7 @@ // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer // type, widen both sides to avoid a trip through memory. if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() && - Subtarget.hasVLX()) { + Subtarget.hasAVX512()) { SDLoc dl(N); N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0); N0 = DAG.getBitcast(MVT::v8i1, N0); @@ -30458,7 +30415,7 @@ // If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer // type, widen both sides to avoid a trip through memory. if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() && - Subtarget.hasVLX()) { + Subtarget.hasAVX512()) { SDLoc dl(N); unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); SmallVector Ops(NumConcats, DAG.getUNDEF(SrcVT)); Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -2962,46 +2962,77 @@ defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>; defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>; -multiclass axv512_icmp_packed_no_vlx_lowering { -def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrr) - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; - -def : Pat<(v8i1 (and VK8:$mask, - (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))), +multiclass axv512_icmp_packed_no_vlx_lowering { +def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2))), + (COPY_TO_REGCLASS + (!cast(InstStr##Zrr) + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), + Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2)))), (COPY_TO_REGCLASS (!cast(InstStr##Zrrk) - (COPY_TO_REGCLASS VK8:$mask, VK16), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), - VK8)>; + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), + Narrow.KRC)>; } multiclass axv512_icmp_packed_cc_no_vlx_lowering { -def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)), - (COPY_TO_REGCLASS (!cast(InstStr##Zrri) - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; - -def : Pat<(v8i1 (and VK8:$mask, (OpNode (_.info256.VT VR256X:$src1), - (_.info256.VT VR256X:$src2), imm:$cc))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) - (COPY_TO_REGCLASS VK8:$mask, VK16), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), - (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), - imm:$cc), VK8)>; + X86VectorVTInfo Narrow, + X86VectorVTInfo Wide> { +def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), imm:$cc)), + (COPY_TO_REGCLASS + (!cast(InstStr##Zrri) + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), + imm:$cc), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (OpNode (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), imm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), + imm:$cc), Narrow.KRC)>; } let Predicates = [HasAVX512, NoVLX] in { - defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_no_vlx_lowering; } // Mask setting all 0s or 1s @@ -3376,8 +3407,15 @@ // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't // available. Use a 512-bit operation and extract. let Predicates = [HasAVX512, NoVLX] in { + defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>; + defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>; defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>; defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>; + + defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>; + defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>; + defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>; + defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>; } let Predicates = [HasAVX512] in { Index: llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td +++ llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td @@ -495,6 +495,18 @@ // If the bits are not zero we have to fall back to explicitly zeroing by // using shifts. +let Predicates = [HasAVX512] in { + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + (v2i1 VK2:$mask), (iPTR 0))), + (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16), + (i8 14)), (i8 14))>; + + def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), + (v4i1 VK4:$mask), (iPTR 0))), + (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16), + (i8 12)), (i8 12))>; +} + let Predicates = [HasAVX512, NoDQI] in { def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), (v8i1 VK8:$mask), (iPTR 0))), @@ -506,9 +518,7 @@ def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), (v8i1 VK8:$mask), (iPTR 0))), (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>; -} -let Predicates = [HasVLX, HasDQI] in { def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV), (v2i1 VK2:$mask), (iPTR 0))), (KSHIFTRBri (KSHIFTLBri (COPY_TO_REGCLASS VK2:$mask, VK8), @@ -519,17 +529,6 @@ (i8 4)), (i8 4))>; } -let Predicates = [HasVLX] in { - def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), - (v2i1 VK2:$mask), (iPTR 0))), - (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK2:$mask, VK16), - (i8 14)), (i8 14))>; - def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV), - (v4i1 VK4:$mask), (iPTR 0))), - (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK4:$mask, VK16), - (i8 12)), (i8 12))>; -} - let Predicates = [HasBWI] in { def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV), (v16i1 VK16:$mask), (iPTR 0))), Index: llvm/trunk/test/Analysis/CostModel/X86/cast.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/cast.ll +++ llvm/trunk/test/Analysis/CostModel/X86/cast.ll @@ -8,11 +8,17 @@ define i32 @add(i32 %arg) { ; CHECK-LABEL: for function 'add' ; -- Same size registeres -- - ;CHECK: cost of 1 {{.*}} zext + ;CHECK-AVX512: cost of 12 {{.*}} zext + ;CHECK-AVX2: cost of 1 {{.*}} zext + ;CHECK-AVX: cost of 1 {{.*}} zext %A = zext <4 x i1> undef to <4 x i32> - ;CHECK: cost of 2 {{.*}} sext + ;CHECK-AVX512: cost of 12 {{.*}} sext + ;CHECK-AVX2: cost of 2 {{.*}} sext + ;CHECK-AVX: cost of 2 {{.*}} sext %B = sext <4 x i1> undef to <4 x i32> - ;CHECK: cost of 0 {{.*}} trunc + ;CHECK-AVX512: cost of 0 {{.*}} trunc + ;CHECK-AVX2: cost of 0 {{.*}} trunc + ;CHECK-AVX: cost of 0 {{.*}} trunc %C = trunc <4 x i32> undef to <4 x i1> ; -- Different size registers -- Index: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll +++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll @@ -702,9 +702,10 @@ ; NOVL-LABEL: f64to4f32_mask: ; NOVL: # %bb.0: ; NOVL-NEXT: vpslld $31, %xmm1, %xmm1 -; NOVL-NEXT: vpsrad $31, %xmm1, %xmm1 +; NOVL-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NOVL-NEXT: vcvtpd2ps %ymm0, %xmm0 -; NOVL-NEXT: vpand %xmm0, %xmm1, %xmm0 +; NOVL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NOVL-NEXT: vzeroupper ; NOVL-NEXT: retq ; @@ -743,9 +744,12 @@ define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) { ; NOVL-LABEL: f32to4f64_mask: ; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; NOVL-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NOVL-NEXT: vcvtps2pd %xmm0, %ymm0 -; NOVL-NEXT: vcmpltpd %ymm2, %ymm1, %ymm1 -; NOVL-NEXT: vandpd %ymm0, %ymm1, %ymm0 +; NOVL-NEXT: vcmpltpd %zmm2, %zmm1, %k1 +; NOVL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} +; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NOVL-NEXT: retq ; ; VL-LABEL: f32to4f64_mask: @@ -1591,12 +1595,15 @@ } define <4 x float> @sbto4f32(<4 x float> %a) { -; NOVL-LABEL: sbto4f32: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: sbto4f32: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NOVLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sbto4f32: ; VLDQ: # %bb.0: @@ -1614,19 +1621,30 @@ ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0 ; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: sbto4f32: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq %cmpres = fcmp ogt <4 x float> %a, zeroinitializer %1 = sitofp <4 x i1> %cmpres to <4 x float> ret <4 x float> %1 } define <4 x double> @sbto4f64(<4 x double> %a) { -; NOVL-LABEL: sbto4f64: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; NOVL-NEXT: vpmovqd %zmm0, %ymm0 -; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: sbto4f64: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0 +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sbto4f64: ; VLDQ: # %bb.0: @@ -1644,18 +1662,30 @@ ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; VLNODQ-NEXT: vcvtdq2pd %xmm0, %ymm0 ; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: sbto4f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512DQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX512DQ-NEXT: retq %cmpres = fcmp ogt <4 x double> %a, zeroinitializer %1 = sitofp <4 x i1> %cmpres to <4 x double> ret <4 x double> %1 } define <2 x float> @sbto2f32(<2 x float> %a) { -; NOVL-LABEL: sbto2f32: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: sbto2f32: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NOVLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sbto2f32: ; VLDQ: # %bb.0: @@ -1673,19 +1703,31 @@ ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0 ; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: sbto2f32: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq %cmpres = fcmp ogt <2 x float> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x float> ret <2 x float> %1 } define <2 x double> @sbto2f64(<2 x double> %a) { -; NOVL-LABEL: sbto2f64: -; NOVL: # %bb.0: -; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; NOVL-NEXT: vcvtdq2pd %xmm0, %xmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: sbto2f64: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sbto2f64: ; VLDQ: # %bb.0: @@ -1703,6 +1745,16 @@ ; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0 ; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: sbto2f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq %cmpres = fcmp ogt <2 x double> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x double> ret <2 x double> %1 @@ -1925,10 +1977,12 @@ define <4 x float> @ubto4f32(<4 x i32> %a) { ; NOVL-LABEL: ubto4f32: ; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] -; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 +; NOVL-NEXT: vzeroupper ; NOVL-NEXT: retq ; ; VL-LABEL: ubto4f32: @@ -1946,9 +2000,10 @@ define <4 x double> @ubto4f64(<4 x i32> %a) { ; NOVL-LABEL: ubto4f64: ; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpsrld $31, %xmm0, %xmm0 +; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} ; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0 ; NOVL-NEXT: retq ; @@ -1969,14 +2024,10 @@ ; NOVL: # %bb.0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpextrb $8, %xmm0, %eax -; NOVL-NEXT: andl $1, %eax -; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1 -; NOVL-NEXT: vpextrb $0, %xmm0, %eax -; NOVL-NEXT: andl $1, %eax -; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 -; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; NOVL-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 +; NOVL-NEXT: vzeroupper ; NOVL-NEXT: retq ; ; VL-LABEL: ubto2f32: @@ -1997,10 +2048,8 @@ ; NOVL: # %bb.0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] -; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NOVL-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} ; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0 ; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NOVL-NEXT: vzeroupper @@ -2020,19 +2069,27 @@ } define <2 x i64> @test_2f64toub(<2 x double> %a, <2 x i64> %passthru) { -; NOVLDQ-LABEL: test_2f64toub: -; NOVLDQ: # %bb.0: -; NOVLDQ-NEXT: vcvttsd2usi %xmm0, %rax -; NOVLDQ-NEXT: vmovq %rax, %xmm2 -; NOVLDQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; NOVLDQ-NEXT: vcvttsd2usi %xmm0, %rax -; NOVLDQ-NEXT: vmovq %rax, %xmm0 -; NOVLDQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; NOVLDQ-NEXT: vpsllq $63, %xmm0, %xmm0 -; NOVLDQ-NEXT: vpsraq $63, %zmm0, %zmm0 -; NOVLDQ-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NOVLDQ-NEXT: vzeroupper -; NOVLDQ-NEXT: retq +; KNL-LABEL: test_2f64toub: +; KNL: # %bb.0: +; KNL-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; KNL-NEXT: vcvttsd2si %xmm2, %eax +; KNL-NEXT: kmovw %eax, %k0 +; KNL-NEXT: vcvttsd2si %xmm0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kshiftrw $1, %k0, %k2 +; KNL-NEXT: kshiftlw $1, %k2, %k2 +; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftrw $1, %k1, %k2 +; KNL-NEXT: kxorw %k0, %k2, %k0 +; KNL-NEXT: kshiftlw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kxorw %k1, %k0, %k1 +; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper +; KNL-NEXT: retq ; ; VL-LABEL: test_2f64toub: ; VL: # %bb.0: @@ -2044,13 +2101,47 @@ ; ; AVX512DQ-LABEL: test_2f64toub: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 -; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpsraq $63, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512DQ-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax +; AVX512DQ-NEXT: kmovw %eax, %k0 +; AVX512DQ-NEXT: vcvttsd2si %xmm0, %eax +; AVX512DQ-NEXT: andl $1, %eax +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kshiftrw $1, %k0, %k2 +; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2 +; AVX512DQ-NEXT: korw %k1, %k2, %k1 +; AVX512DQ-NEXT: kshiftrw $1, %k1, %k2 +; AVX512DQ-NEXT: kxorw %k0, %k2, %k0 +; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0 +; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0 +; AVX512DQ-NEXT: kxorw %k1, %k0, %k1 +; AVX512DQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_2f64toub: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax +; AVX512BW-NEXT: kmovd %eax, %k0 +; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax +; AVX512BW-NEXT: andl $1, %eax +; AVX512BW-NEXT: kmovw %eax, %k1 +; AVX512BW-NEXT: kshiftrw $1, %k0, %k2 +; AVX512BW-NEXT: kshiftlw $1, %k2, %k2 +; AVX512BW-NEXT: korw %k1, %k2, %k1 +; AVX512BW-NEXT: kshiftrw $1, %k1, %k2 +; AVX512BW-NEXT: kxorw %k0, %k2, %k0 +; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 +; AVX512BW-NEXT: kshiftrw $14, %k0, %k0 +; AVX512BW-NEXT: kxorw %k1, %k0, %k1 +; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; AVX512BW-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq %mask = fptoui <2 x double> %a to <2 x i1> %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer ret <2 x i64> %select @@ -2059,12 +2150,12 @@ define <4 x i64> @test_4f64toub(<4 x double> %a, <4 x i64> %passthru) { ; NOVL-LABEL: test_4f64toub: ; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NOVL-NEXT: vcvttpd2udq %zmm0, %ymm0 +; NOVL-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NOVL-NEXT: vcvttpd2dq %ymm0, %xmm0 ; NOVL-NEXT: vpslld $31, %xmm0, %xmm0 -; NOVL-NEXT: vpsrad $31, %xmm0, %xmm0 -; NOVL-NEXT: vpmovsxdq %xmm0, %ymm0 -; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NOVL-NEXT: retq ; ; VL-LABEL: test_4f64toub: @@ -2101,19 +2192,16 @@ } define <2 x i64> @test_2f32toub(<2 x float> %a, <2 x i64> %passthru) { -; NOVLDQ-LABEL: test_2f32toub: -; NOVLDQ: # %bb.0: -; NOVLDQ-NEXT: vcvttss2usi %xmm0, %rax -; NOVLDQ-NEXT: vmovq %rax, %xmm2 -; NOVLDQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; NOVLDQ-NEXT: vcvttss2usi %xmm0, %rax -; NOVLDQ-NEXT: vmovq %rax, %xmm0 -; NOVLDQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; NOVLDQ-NEXT: vpsllq $63, %xmm0, %xmm0 -; NOVLDQ-NEXT: vpsraq $63, %zmm0, %zmm0 -; NOVLDQ-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NOVLDQ-NEXT: vzeroupper -; NOVLDQ-NEXT: retq +; NOVL-LABEL: test_2f32toub: +; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NOVL-NEXT: vcvttps2dq %xmm0, %xmm0 +; NOVL-NEXT: vpslld $31, %xmm0, %xmm0 +; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; NOVL-NEXT: vzeroupper +; NOVL-NEXT: retq ; ; VL-LABEL: test_2f32toub: ; VL: # %bb.0: @@ -2122,16 +2210,6 @@ ; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 ; VL-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} ; VL-NEXT: retq -; -; AVX512DQ-LABEL: test_2f32toub: -; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 -; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpsraq $63, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vzeroupper -; AVX512DQ-NEXT: retq %mask = fptoui <2 x float> %a to <2 x i1> %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer ret <2 x i64> %select @@ -2140,12 +2218,12 @@ define <4 x i64> @test_4f32toub(<4 x float> %a, <4 x i64> %passthru) { ; NOVL-LABEL: test_4f32toub: ; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; NOVL-NEXT: vcvttps2udq %zmm0, %zmm0 +; NOVL-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NOVL-NEXT: vcvttps2dq %xmm0, %xmm0 ; NOVL-NEXT: vpslld $31, %xmm0, %xmm0 -; NOVL-NEXT: vpsrad $31, %xmm0, %xmm0 -; NOVL-NEXT: vpmovsxdq %xmm0, %ymm0 -; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NOVL-NEXT: retq ; ; VL-LABEL: test_4f32toub: @@ -2195,16 +2273,27 @@ } define <2 x i64> @test_2f64tosb(<2 x double> %a, <2 x i64> %passthru) { -; NOVLDQ-LABEL: test_2f64tosb: -; NOVLDQ: # %bb.0: -; NOVLDQ-NEXT: vcvttsd2si %xmm0, %rax -; NOVLDQ-NEXT: vmovq %rax, %xmm2 -; NOVLDQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; NOVLDQ-NEXT: vcvttsd2si %xmm0, %rax -; NOVLDQ-NEXT: vmovq %rax, %xmm0 -; NOVLDQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; NOVLDQ-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NOVLDQ-NEXT: retq +; KNL-LABEL: test_2f64tosb: +; KNL: # %bb.0: +; KNL-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; KNL-NEXT: vcvttsd2si %xmm2, %eax +; KNL-NEXT: kmovw %eax, %k0 +; KNL-NEXT: vcvttsd2si %xmm0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: kmovw %eax, %k1 +; KNL-NEXT: kshiftrw $1, %k0, %k2 +; KNL-NEXT: kshiftlw $1, %k2, %k2 +; KNL-NEXT: korw %k1, %k2, %k1 +; KNL-NEXT: kshiftrw $1, %k1, %k2 +; KNL-NEXT: kxorw %k0, %k2, %k0 +; KNL-NEXT: kshiftlw $15, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k0 +; KNL-NEXT: kxorw %k1, %k0, %k1 +; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper +; KNL-NEXT: retq ; ; VL-LABEL: test_2f64tosb: ; VL: # %bb.0: @@ -2216,11 +2305,47 @@ ; ; AVX512DQ-LABEL: test_2f64tosb: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 -; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX512DQ-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax +; AVX512DQ-NEXT: kmovw %eax, %k0 +; AVX512DQ-NEXT: vcvttsd2si %xmm0, %eax +; AVX512DQ-NEXT: andl $1, %eax +; AVX512DQ-NEXT: kmovw %eax, %k1 +; AVX512DQ-NEXT: kshiftrw $1, %k0, %k2 +; AVX512DQ-NEXT: kshiftlw $1, %k2, %k2 +; AVX512DQ-NEXT: korw %k1, %k2, %k1 +; AVX512DQ-NEXT: kshiftrw $1, %k1, %k2 +; AVX512DQ-NEXT: kxorw %k0, %k2, %k0 +; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0 +; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0 +; AVX512DQ-NEXT: kxorw %k1, %k0, %k1 +; AVX512DQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_2f64tosb: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] +; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax +; AVX512BW-NEXT: kmovd %eax, %k0 +; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax +; AVX512BW-NEXT: andl $1, %eax +; AVX512BW-NEXT: kmovw %eax, %k1 +; AVX512BW-NEXT: kshiftrw $1, %k0, %k2 +; AVX512BW-NEXT: kshiftlw $1, %k2, %k2 +; AVX512BW-NEXT: korw %k1, %k2, %k1 +; AVX512BW-NEXT: kshiftrw $1, %k1, %k2 +; AVX512BW-NEXT: kxorw %k0, %k2, %k0 +; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 +; AVX512BW-NEXT: kshiftrw $14, %k0, %k0 +; AVX512BW-NEXT: kxorw %k1, %k0, %k1 +; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; AVX512BW-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq %mask = fptosi <2 x double> %a to <2 x i1> %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer ret <2 x i64> %select @@ -2229,9 +2354,11 @@ define <4 x i64> @test_4f64tosb(<4 x double> %a, <4 x i64> %passthru) { ; NOVL-LABEL: test_4f64tosb: ; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NOVL-NEXT: vcvttpd2dq %ymm0, %xmm0 -; NOVL-NEXT: vpmovsxdq %xmm0, %ymm0 -; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NOVL-NEXT: retq ; ; VL-LABEL: test_4f64tosb: @@ -2265,16 +2392,15 @@ } define <2 x i64> @test_2f32tosb(<2 x float> %a, <2 x i64> %passthru) { -; NOVLDQ-LABEL: test_2f32tosb: -; NOVLDQ: # %bb.0: -; NOVLDQ-NEXT: vcvttss2si %xmm0, %rax -; NOVLDQ-NEXT: vmovq %rax, %xmm2 -; NOVLDQ-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; NOVLDQ-NEXT: vcvttss2si %xmm0, %rax -; NOVLDQ-NEXT: vmovq %rax, %xmm0 -; NOVLDQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] -; NOVLDQ-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NOVLDQ-NEXT: retq +; NOVL-LABEL: test_2f32tosb: +; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NOVL-NEXT: vcvttps2dq %xmm0, %xmm0 +; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; NOVL-NEXT: vzeroupper +; NOVL-NEXT: retq ; ; VL-LABEL: test_2f32tosb: ; VL: # %bb.0: @@ -2282,14 +2408,6 @@ ; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 ; VL-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} ; VL-NEXT: retq -; -; AVX512DQ-LABEL: test_2f32tosb: -; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 -; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vzeroupper -; AVX512DQ-NEXT: retq %mask = fptosi <2 x float> %a to <2 x i1> %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer ret <2 x i64> %select @@ -2298,9 +2416,11 @@ define <4 x i64> @test_4f32tosb(<4 x float> %a, <4 x i64> %passthru) { ; NOVL-LABEL: test_4f32tosb: ; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NOVL-NEXT: vcvttps2dq %xmm0, %xmm0 -; NOVL-NEXT: vpmovsxdq %xmm0, %ymm0 -; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NOVL-NEXT: retq ; ; VL-LABEL: test_4f32tosb: Index: llvm/trunk/test/CodeGen/X86/avx512-ext.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-ext.ll +++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll @@ -301,9 +301,10 @@ ; KNL-LABEL: zext_4x8mem_to_4x32: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_4x8mem_to_4x32: @@ -322,9 +323,10 @@ ; KNL-LABEL: sext_4x8mem_to_4x32: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd (%rdi), %xmm1 -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxbd (%rdi), %xmm0 +; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_4x8mem_to_4x32: @@ -489,9 +491,10 @@ ; KNL-LABEL: zext_2x8mem_to_2x64: ; KNL: # %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 -; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_2x8mem_to_2x64: @@ -509,9 +512,10 @@ ; KNL-LABEL: sext_2x8mem_to_2x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 -; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxbq (%rdi), %xmm1 -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxbq (%rdi), %xmm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_2x8mem_to_2x64mask: @@ -539,10 +543,10 @@ ; KNL-LABEL: zext_4x8mem_to_4x64: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; KNL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_4x8mem_to_4x64: @@ -561,10 +565,10 @@ ; KNL-LABEL: sext_4x8mem_to_4x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 -; KNL-NEXT: vpmovsxbq (%rdi), %ymm1 -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxbq (%rdi), %ymm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_4x8mem_to_4x64mask: @@ -645,9 +649,10 @@ ; KNL-LABEL: zext_4x16mem_to_4x32: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_4x16mem_to_4x32: @@ -666,9 +671,10 @@ ; KNL-LABEL: sext_4x16mem_to_4x32mask: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxwd (%rdi), %xmm1 -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxwd (%rdi), %xmm0 +; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_4x16mem_to_4x32mask: @@ -865,9 +871,10 @@ ; KNL-LABEL: zext_2x16mem_to_2x64: ; KNL: # %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 -; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_2x16mem_to_2x64: @@ -886,9 +893,10 @@ ; KNL-LABEL: sext_2x16mem_to_2x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 -; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxwq (%rdi), %xmm1 -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxwq (%rdi), %xmm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_2x16mem_to_2x64mask: @@ -917,10 +925,10 @@ ; KNL-LABEL: zext_4x16mem_to_4x64: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; KNL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_4x16mem_to_4x64: @@ -939,10 +947,10 @@ ; KNL-LABEL: sext_4x16mem_to_4x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 -; KNL-NEXT: vpmovsxwq (%rdi), %ymm1 -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxwq (%rdi), %ymm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_4x16mem_to_4x64mask: @@ -1052,9 +1060,10 @@ ; KNL-LABEL: zext_2x32mem_to_2x64: ; KNL: # %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 -; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_2x32mem_to_2x64: @@ -1073,9 +1082,10 @@ ; KNL-LABEL: sext_2x32mem_to_2x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 -; KNL-NEXT: vpsraq $63, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxdq (%rdi), %xmm1 -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxdq (%rdi), %xmm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_2x32mem_to_2x64mask: @@ -1104,10 +1114,10 @@ ; KNL-LABEL: zext_4x32mem_to_4x64: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_4x32mem_to_4x64: @@ -1126,10 +1136,10 @@ ; KNL-LABEL: sext_4x32mem_to_4x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 -; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxdq %xmm0, %ymm0 -; KNL-NEXT: vpmovsxdq (%rdi), %ymm1 -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxdq (%rdi), %ymm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_4x32mem_to_4x64mask: @@ -1167,10 +1177,10 @@ ; KNL-LABEL: zext_4x32_to_4x64mask: ; KNL: # %bb.0: ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_4x32_to_4x64mask: Index: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll +++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll @@ -844,40 +844,20 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) { ; KNL-LABEL: test_iinsertelement_v4i1: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: setb %al -; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpextrb $4, %xmm0, %ecx -; KNL-NEXT: kmovw %ecx, %k0 -; KNL-NEXT: vpextrb $0, %xmm0, %ecx -; KNL-NEXT: andl $1, %ecx -; KNL-NEXT: kmovw %ecx, %k1 -; KNL-NEXT: kshiftrw $1, %k0, %k2 -; KNL-NEXT: kshiftlw $1, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k2 -; KNL-NEXT: kxorw %k0, %k2, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kxorw %k1, %k0, %k0 +; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; KNL-NEXT: kshiftrw $2, %k0, %k1 ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kxorw %k2, %k1, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $13, %k1, %k1 ; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k1 -; KNL-NEXT: vpextrb $12, %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def %al killed %al killed %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_iinsertelement_v4i1: @@ -905,18 +885,11 @@ define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) { ; KNL-LABEL: test_iinsertelement_v2i1: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: setb %al -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpextrb $0, %xmm0, %ecx -; KNL-NEXT: andl $1, %ecx -; KNL-NEXT: kmovw %ecx, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k1 -; KNL-NEXT: kshiftlw $1, %k1, %k1 -; KNL-NEXT: korw %k0, %k1, %k0 +; KNL-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k1 ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kxorw %k2, %k1, %k1 @@ -925,6 +898,7 @@ ; KNL-NEXT: kxorw %k0, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: def %al killed %al killed %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_iinsertelement_v2i1: @@ -952,15 +926,15 @@ define zeroext i8 @test_extractelement_v2i1(<2 x i64> %a, <2 x i64> %b) { ; KNL-LABEL: test_extractelement_v2i1: ; KNL: ## %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpextrb $0, %xmm0, %eax +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andb $1, %al ; KNL-NEXT: movb $4, %cl ; KNL-NEXT: subb %al, %cl ; KNL-NEXT: movzbl %cl, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_v2i1: @@ -981,15 +955,15 @@ define zeroext i8 @extractelement_v2i1_alt(<2 x i64> %a, <2 x i64> %b) { ; KNL-LABEL: extractelement_v2i1_alt: ; KNL: ## %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpextrb $0, %xmm0, %eax +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andb $1, %al ; KNL-NEXT: movb $4, %cl ; KNL-NEXT: subb %al, %cl ; KNL-NEXT: movzbl %cl, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: extractelement_v2i1_alt: @@ -1011,12 +985,13 @@ define zeroext i8 @test_extractelement_v4i1(<4 x i32> %a, <4 x i32> %b) { ; KNL-LABEL: test_extractelement_v4i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpextrd $3, %xmm0, %eax +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 +; KNL-NEXT: kshiftrw $3, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andl $1, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_v4i1: @@ -1550,14 +1525,15 @@ ; KNL-LABEL: test_extractelement_varible_v2i1: ; KNL: ## %bb.0: ; KNL-NEXT: ## kill: def %edi killed %edi def %rdi -; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vextracti32x4 $0, %zmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $1, %edi -; KNL-NEXT: movl -24(%rsp,%rdi,8), %eax +; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax ; KNL-NEXT: andl $1, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_varible_v2i1: @@ -1580,14 +1556,15 @@ ; KNL-LABEL: test_extractelement_varible_v4i1: ; KNL: ## %bb.0: ; KNL-NEXT: ## kill: def %edi killed %edi def %rdi -; KNL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: vextracti32x4 $0, %zmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $3, %edi -; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax +; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax ; KNL-NEXT: andl $1, %eax +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test_extractelement_varible_v4i1: Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -3004,20 +3004,8 @@ define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) { ; CHECK-LABEL: test_mask_vextractf32x4: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovd %edi, %xmm2 -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kshiftrw $3, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %eax -; CHECK-NEXT: kshiftrw $2, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %ecx -; CHECK-NEXT: kshiftrw $1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %edx -; CHECK-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2 -; CHECK-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 -; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1 -; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 -; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm0 {%k1} ; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask) ret <4 x float> %res @@ -3028,21 +3016,8 @@ define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) { ; CHECK-LABEL: test_mask_vextracti64x4: ; CHECK: ## %bb.0: -; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1 -; CHECK-NEXT: vmovd %edi, %xmm2 -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kshiftrw $3, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %eax -; CHECK-NEXT: kshiftrw $2, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %ecx -; CHECK-NEXT: kshiftrw $1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %edx -; CHECK-NEXT: vpinsrb $4, %edx, %xmm2, %xmm2 -; CHECK-NEXT: vpinsrb $8, %ecx, %xmm2, %xmm2 -; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 -; CHECK-NEXT: vpmovsxdq %xmm2, %ymm2 -; CHECK-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm0 {%k1} ; CHECK-NEXT: retq %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 1, <4 x i64> %b, i8 %mask) ret <4 x i64> %res @@ -3053,21 +3028,8 @@ define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { ; CHECK-LABEL: test_maskz_vextracti32x4: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovd %edi, %xmm1 -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kshiftrw $3, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %eax -; CHECK-NEXT: kshiftrw $2, %k0, %k1 -; CHECK-NEXT: kmovw %k1, %ecx -; CHECK-NEXT: kshiftrw $1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %edx -; CHECK-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; CHECK-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; CHECK-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 -; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 -; CHECK-NEXT: vpsrad $31, %xmm1, %xmm1 -; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask) ret <4 x i32> %res Index: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll +++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll @@ -498,11 +498,15 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { ; KNL-LABEL: test4: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpmovqd %zmm0, %ymm0 -; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 -; KNL-NEXT: vpmovqd %zmm1, %ymm1 -; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: ## kill: def %ymm3 killed %ymm3 def %zmm3 +; KNL-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2 +; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; KNL-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; KNL-NEXT: kandnw %k0, %k1, %k1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -517,21 +521,29 @@ ; ; AVX512BW-LABEL: test4: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 -; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512BW-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: ## kill: def %ymm3 killed %ymm3 def %zmm3 +; AVX512BW-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2 +; AVX512BW-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512BW-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; AVX512BW-NEXT: kandnw %k0, %k1, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test4: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512DQ-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 -; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512DQ-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX512DQ-NEXT: ## kill: def %ymm3 killed %ymm3 def %zmm3 +; AVX512DQ-NEXT: ## kill: def %ymm2 killed %ymm2 def %zmm2 +; AVX512DQ-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512DQ-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512DQ-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; AVX512DQ-NEXT: kandnw %k0, %k1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %x_gt_y = icmp sgt <4 x i64> %x, %y @@ -544,9 +556,16 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { ; KNL-LABEL: test5: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 -; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; KNL-NEXT: ## kill: def %xmm3 killed %xmm3 def %zmm3 +; KNL-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 +; KNL-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; KNL-NEXT: kandnw %k1, %k0, %k1 +; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test5: @@ -559,16 +578,30 @@ ; ; AVX512BW-LABEL: test5: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX512BW-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 -; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX512BW-NEXT: ## kill: def %xmm3 killed %xmm3 def %zmm3 +; AVX512BW-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512BW-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512BW-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 +; AVX512BW-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; AVX512BW-NEXT: kandnw %k1, %k0, %k1 +; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test5: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX512DQ-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 -; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX512DQ-NEXT: ## kill: def %xmm3 killed %xmm3 def %zmm3 +; AVX512DQ-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512DQ-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpcmpgtq %zmm3, %zmm2, %k1 +; AVX512DQ-NEXT: kandnw %k1, %k0, %k0 +; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 +; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %x_gt_y = icmp slt <2 x i64> %x, %y %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 @@ -795,10 +828,17 @@ ; KNL-LABEL: test11: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: jg LBB20_2 -; KNL-NEXT: ## %bb.1: -; KNL-NEXT: vmovaps %xmm1, %xmm0 -; KNL-NEXT: LBB20_2: +; KNL-NEXT: jg LBB20_1 +; KNL-NEXT: ## %bb.2: +; KNL-NEXT: vpslld $31, %xmm1, %xmm0 +; KNL-NEXT: jmp LBB20_3 +; KNL-NEXT: LBB20_1: +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 +; KNL-NEXT: LBB20_3: +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test11: @@ -818,19 +858,33 @@ ; AVX512BW-LABEL: test11: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi -; AVX512BW-NEXT: jg LBB20_2 -; AVX512BW-NEXT: ## %bb.1: -; AVX512BW-NEXT: vmovaps %xmm1, %xmm0 -; AVX512BW-NEXT: LBB20_2: +; AVX512BW-NEXT: jg LBB20_1 +; AVX512BW-NEXT: ## %bb.2: +; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm0 +; AVX512BW-NEXT: jmp LBB20_3 +; AVX512BW-NEXT: LBB20_1: +; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512BW-NEXT: LBB20_3: +; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test11: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi -; AVX512DQ-NEXT: jg LBB20_2 -; AVX512DQ-NEXT: ## %bb.1: -; AVX512DQ-NEXT: vmovaps %xmm1, %xmm0 -; AVX512DQ-NEXT: LBB20_2: +; AVX512DQ-NEXT: jg LBB20_1 +; AVX512DQ-NEXT: ## %bb.2: +; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm0 +; AVX512DQ-NEXT: jmp LBB20_3 +; AVX512DQ-NEXT: LBB20_1: +; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512DQ-NEXT: LBB20_3: +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %mask = icmp sgt i32 %a1, %b1 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b @@ -1271,8 +1325,7 @@ define void @test22(<4 x i1> %a, <4 x i1>* %addr) { ; KNL-LABEL: test22: ; KNL: ## %bb.0: -; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL-NEXT: vpslld $31, %ymm0, %ymm0 +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) @@ -1288,8 +1341,7 @@ ; ; AVX512BW-LABEL: test22: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0 -; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) @@ -1298,8 +1350,7 @@ ; ; AVX512DQ-LABEL: test22: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 def %ymm0 -; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper @@ -1311,8 +1362,7 @@ define void @test23(<2 x i1> %a, <2 x i1>* %addr) { ; KNL-LABEL: test23: ; KNL: ## %bb.0: -; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 +; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) @@ -1328,8 +1378,7 @@ ; ; AVX512BW-LABEL: test23: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 -; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) @@ -1338,8 +1387,7 @@ ; ; AVX512DQ-LABEL: test23: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 -; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper @@ -1390,10 +1438,9 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; KNL-LABEL: store_v2i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 +; KNL-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL-NEXT: knotw %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: vzeroupper @@ -1409,10 +1456,9 @@ ; ; AVX512BW-LABEL: store_v2i1: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: knotw %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) ; AVX512BW-NEXT: vzeroupper @@ -1420,10 +1466,9 @@ ; ; AVX512DQ-LABEL: store_v2i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: knotw %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -1435,10 +1480,9 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; KNL-LABEL: store_v4i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpslld $31, %ymm0, %ymm0 +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: knotw %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: movb %al, (%rdi) ; KNL-NEXT: vzeroupper @@ -1454,10 +1498,9 @@ ; ; AVX512BW-LABEL: store_v4i1: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512BW-NEXT: knotw %k0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax ; AVX512BW-NEXT: movb %al, (%rdi) ; AVX512BW-NEXT: vzeroupper @@ -1465,10 +1508,9 @@ ; ; AVX512DQ-LABEL: store_v4i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: knotw %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll +++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll @@ -72,9 +72,13 @@ define <4 x float> @test7(<4 x float> %a, <4 x float> %b) { ; KNL-LABEL: test7: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 -; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test7: @@ -92,9 +96,13 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) { ; KNL-LABEL: test8: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2 -; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test8: @@ -537,8 +545,11 @@ define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind { ; KNL-LABEL: test30: ; KNL: ## %bb.0: -; KNL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm2 -; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; KNL-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test30: @@ -555,8 +566,13 @@ define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind { ; KNL-LABEL: test31: ; KNL: ## %bb.0: -; KNL-NEXT: vcmpltpd (%rdi), %xmm0, %xmm2 -; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vmovupd (%rdi), %xmm2 +; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test31: @@ -574,8 +590,12 @@ define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind { ; KNL-LABEL: test32: ; KNL: ## %bb.0: -; KNL-NEXT: vcmpltpd (%rdi), %ymm0, %ymm2 -; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; KNL-NEXT: vmovupd (%rdi), %ymm2 +; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test32: @@ -605,8 +625,13 @@ define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind { ; KNL-LABEL: test34: ; KNL: ## %bb.0: -; KNL-NEXT: vcmpltps (%rdi), %xmm0, %xmm2 -; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL-NEXT: vmovups (%rdi), %xmm2 +; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test34: @@ -674,9 +699,12 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind { ; KNL-LABEL: test38: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL-NEXT: vbroadcastsd (%rdi), %ymm2 -; KNL-NEXT: vcmpltpd %ymm2, %ymm0, %ymm2 -; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test38: @@ -697,9 +725,13 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind { ; KNL-LABEL: test39: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2 -; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vcmpltpd %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test39: @@ -763,9 +795,13 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind { ; KNL-LABEL: test42: ; KNL: ## %bb.0: +; KNL-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vbroadcastss (%rdi), %xmm2 -; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 -; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1 +; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; KNL-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test42: Index: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll @@ -6,18 +6,12 @@ define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512: ; CHECK: ## %bb.0: -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vmovd %edi, %xmm2 -; CHECK-NEXT: kmovw %edi, %k0 -; CHECK-NEXT: kshiftrb $1, %k0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; CHECK-NEXT: vpsllq $63, %xmm2, %xmm2 -; CHECK-NEXT: vpsraq $63, %zmm2, %zmm2 -; CHECK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1 -; CHECK-NEXT: vandpd %xmm0, %xmm2, %xmm2 -; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} +; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3) %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3) Index: llvm/trunk/test/CodeGen/X86/avx512vl-vec-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-vec-cmp.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-cmp.ll @@ -11,8 +11,11 @@ ; ; NoVLX-LABEL: test256_1: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm2 -; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp eq <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y @@ -28,8 +31,12 @@ ; ; NoVLX-LABEL: test256_2: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 +; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp sgt <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y @@ -66,11 +73,12 @@ ; ; NoVLX-LABEL: test256_4: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm3, %ymm1, %ymm4 -; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0 -; NoVLX-NEXT: vpcmpgtq %ymm4, %ymm0, %ymm0 -; NoVLX-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 +; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp ugt <4 x i64> %x, %y %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y @@ -289,12 +297,14 @@ ; ; NoVLX-LABEL: test256_10: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm3 -; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm1 -; NoVLX-NEXT: vpandn %ymm3, %ymm1, %ymm1 -; NoVLX-NEXT: vblendvpd %ymm1, %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: # kill: def %ymm3 killed %ymm3 def %zmm3 +; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} +; NoVLX-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <4 x i64> %x1, %y1 %mask0 = icmp sle <4 x i64> %x, %y @@ -313,10 +323,14 @@ ; ; NoVLX-LABEL: test256_11: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm3 -; NoVLX-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm2 -; NoVLX-NEXT: vpand %ymm2, %ymm3, %ymm2 -; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %ymm3 +; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 {%k1} +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sgt <4 x i64> %x1, %y1 %y = load <4 x i64>, <4 x i64>* %y.ptr, align 4 @@ -362,9 +376,12 @@ ; ; NoVLX-LABEL: test256_13: ; NoVLX: # %bb.0: +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm2 -; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0 @@ -437,11 +454,14 @@ ; ; NoVLX-LABEL: test256_16: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm2 +; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm3 -; NoVLX-NEXT: vpcmpgtq %ymm3, %ymm0, %ymm3 -; NoVLX-NEXT: vpandn %ymm3, %ymm2, %ymm2 -; NoVLX-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1 +; NoVLX-NEXT: vpcmpleq %zmm1, %zmm2, %k1 {%k1} +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <4 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 @@ -550,8 +570,11 @@ ; ; NoVLX-LABEL: test128_1: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2 -; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp eq <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y @@ -567,8 +590,12 @@ ; ; NoVLX-LABEL: test128_2: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp sgt <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y @@ -584,10 +611,12 @@ ; ; NoVLX-LABEL: test128_3: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %xmm3, %xmm0, %xmm0 -; NoVLX-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 +; NoVLX-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp sge <4 x i32> %x, %y %max = select <4 x i1> %mask, <4 x i32> %x1, <4 x i32> %y @@ -603,11 +632,12 @@ ; ; NoVLX-LABEL: test128_4: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm3, %xmm1, %xmm4 -; NoVLX-NEXT: vpxor %xmm3, %xmm0, %xmm0 -; NoVLX-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm0 -; NoVLX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask = icmp ugt <2 x i64> %x, %y %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y @@ -623,8 +653,12 @@ ; ; NoVLX-LABEL: test128_5: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %yp, align 4 %mask = icmp eq <4 x i32> %x, %y @@ -641,8 +675,12 @@ ; ; NoVLX-LABEL: test128_5b: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpeqd %zmm0, %zmm2, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %yp, align 4 %mask = icmp eq <4 x i32> %y, %x @@ -659,8 +697,12 @@ ; ; NoVLX-LABEL: test128_6: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sgt <4 x i32> %x, %y @@ -677,8 +719,12 @@ ; ; NoVLX-LABEL: test128_6b: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp slt <4 x i32> %y, %x @@ -695,10 +741,12 @@ ; ; NoVLX-LABEL: test128_7: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sle <4 x i32> %x, %y @@ -715,10 +763,12 @@ ; ; NoVLX-LABEL: test128_7b: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp sge <4 x i32> %y, %x @@ -735,9 +785,12 @@ ; ; NoVLX-LABEL: test128_8: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpminud (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpleud %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ule <4 x i32> %x, %y @@ -754,10 +807,12 @@ ; ; NoVLX-LABEL: test128_8b: ; NoVLX: # %bb.0: +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 -; NoVLX-NEXT: vpmaxud %xmm0, %xmm2, %xmm3 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpnltud %zmm0, %zmm2, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp uge <4 x i32> %y, %x @@ -775,10 +830,14 @@ ; ; NoVLX-LABEL: test128_9: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm3 -; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm3 killed %xmm3 def %zmm3 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp eq <4 x i32> %x1, %y1 %mask0 = icmp eq <4 x i32> %x, %y @@ -797,12 +856,14 @@ ; ; NoVLX-LABEL: test128_10: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm3 -; NoVLX-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 -; NoVLX-NEXT: vpxor %xmm4, %xmm3, %xmm3 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm1 -; NoVLX-NEXT: vpandn %xmm3, %xmm1, %xmm1 -; NoVLX-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0 +; NoVLX-NEXT: # kill: def %xmm3 killed %xmm3 def %zmm3 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm1, %zmm0, %k1 +; NoVLX-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} +; NoVLX-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <2 x i64> %x1, %y1 %mask0 = icmp sle <2 x i64> %x, %y @@ -821,10 +882,14 @@ ; ; NoVLX-LABEL: test128_11: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm3 -; NoVLX-NEXT: vpcmpgtq %xmm2, %xmm1, %xmm2 -; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 -; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm3 +; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 {%k1} +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sgt <2 x i64> %x1, %y1 %y = load <2 x i64>, <2 x i64>* %y.ptr, align 4 @@ -844,11 +909,14 @@ ; ; NoVLX-LABEL: test128_12: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm2 -; NoVLX-NEXT: vpminud (%rdi), %xmm0, %xmm3 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm3 -; NoVLX-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm3 +; NoVLX-NEXT: vpcmpleud %zmm3, %zmm0, %k1 +; NoVLX-NEXT: vpcmpled %zmm1, %zmm2, %k1 {%k1} +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <4 x i32> %x1, %y1 %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 @@ -867,9 +935,12 @@ ; ; NoVLX-LABEL: test128_13: ; NoVLX: # %bb.0: +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm2 -; NoVLX-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm2 -; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0 @@ -888,11 +959,12 @@ ; ; NoVLX-LABEL: test128_14: ; NoVLX: # %bb.0: +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm2 -; NoVLX-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0 @@ -912,11 +984,14 @@ ; ; NoVLX-LABEL: test128_15: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm2 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm3 -; NoVLX-NEXT: vpcmpgtd %xmm3, %xmm0, %xmm3 -; NoVLX-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpgtd %zmm3, %zmm0, %k1 +; NoVLX-NEXT: vpcmpled %zmm1, %zmm2, %k1 {%k1} +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <4 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 @@ -938,11 +1013,14 @@ ; ; NoVLX-LABEL: test128_16: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm2, %xmm2 +; NoVLX-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm3 -; NoVLX-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3 -; NoVLX-NEXT: vpandn %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpgtq %zmm3, %zmm0, %k1 +; NoVLX-NEXT: vpcmpleq %zmm1, %zmm2, %k1 {%k1} +; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %mask1 = icmp sge <2 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 @@ -963,10 +1041,12 @@ ; ; NoVLX-LABEL: test128_17: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpneqd %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ne <4 x i32> %x, %y @@ -983,10 +1063,12 @@ ; ; NoVLX-LABEL: test128_18: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; NoVLX-NEXT: vpxor %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpneqd %zmm0, %zmm2, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp ne <4 x i32> %y, %x @@ -1003,9 +1085,12 @@ ; ; NoVLX-LABEL: test128_19: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpmaxud (%rdi), %xmm0, %xmm2 -; NoVLX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 +; NoVLX-NEXT: vpcmpnltud %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp uge <4 x i32> %x, %y @@ -1022,10 +1107,12 @@ ; ; NoVLX-LABEL: test128_20: ; NoVLX: # %bb.0: +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqu (%rdi), %xmm2 -; NoVLX-NEXT: vpmaxud %xmm0, %xmm2, %xmm3 -; NoVLX-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2 -; NoVLX-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpnltud %zmm0, %zmm2, %k1 +; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; NoVLX-NEXT: retq %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4 %mask = icmp uge <4 x i32> %y, %x Index: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -2329,37 +2329,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2380,37 +2357,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2433,50 +2387,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2501,50 +2420,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2570,38 +2454,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2625,51 +2485,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2696,37 +2520,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2747,37 +2548,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2800,50 +2578,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2868,50 +2611,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2937,38 +2645,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -2992,51 +2676,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -3069,13 +2717,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -3108,13 +2768,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -3149,8 +2821,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3158,16 +2832,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3206,8 +2877,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3215,16 +2888,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3264,14 +2934,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -3307,9 +2988,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3317,16 +2999,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3367,16 +3046,27 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -3412,16 +3102,27 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -3459,8 +3160,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3468,19 +3171,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3522,8 +3221,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3531,19 +3232,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -3586,17 +3283,27 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -3635,9 +3342,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -3645,19 +3353,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -5264,13 +4968,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5291,13 +4996,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5320,20 +5026,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5358,20 +5059,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5397,14 +5093,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5428,21 +5124,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5470,23 +5160,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5507,23 +5188,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5546,30 +5218,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5594,30 +5251,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5643,24 +5285,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5684,31 +5316,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5735,23 +5351,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5772,23 +5379,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5811,30 +5409,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5859,30 +5442,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5908,24 +5476,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -5949,31 +5507,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -6006,13 +5548,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -6045,13 +5593,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -6086,19 +5640,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6137,19 +5690,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6189,14 +5741,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -6232,20 +5789,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6286,16 +5841,21 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -6331,16 +5891,21 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -6378,22 +5943,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6435,22 +5998,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6493,17 +6054,21 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -6542,23 +6107,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6597,36 +6159,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6651,36 +6188,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6707,49 +6219,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6778,49 +6253,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6850,37 +6288,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6908,50 +6320,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -6982,36 +6356,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7036,36 +6385,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7092,49 +6416,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7163,49 +6450,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7235,37 +6485,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7293,50 +6517,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -7373,13 +6559,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7415,14 +6611,24 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) @@ -7459,9 +6665,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7469,16 +6676,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7518,9 +6722,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7528,16 +6733,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7578,14 +6780,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7624,10 +6835,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7635,16 +6846,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7686,16 +6894,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7734,16 +6951,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7784,9 +7010,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7794,19 +7021,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7849,9 +7072,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7859,19 +7083,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7915,17 +7135,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -7967,10 +7195,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -7978,19 +7206,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -11329,37 +10553,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11380,37 +10581,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11433,50 +10611,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11501,50 +10644,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11570,38 +10678,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11625,51 +10709,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11696,37 +10744,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11747,37 +10772,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11800,50 +10802,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11868,50 +10835,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11937,38 +10869,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -11992,51 +10900,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -12069,13 +10941,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -12108,13 +10992,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -12149,8 +11045,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12158,16 +11056,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12206,8 +11101,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12215,16 +11112,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12264,14 +11158,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -12307,9 +11212,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12317,16 +11223,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12367,16 +11270,27 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -12412,16 +11326,27 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -12459,8 +11384,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12468,19 +11395,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12522,8 +11445,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12531,19 +11456,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -12586,17 +11507,27 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -12635,9 +11566,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -12645,19 +11577,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -14264,13 +13192,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14291,13 +13220,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14320,20 +13250,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14358,20 +13283,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14397,14 +13317,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14428,21 +13348,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14470,23 +13384,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14507,23 +13412,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14546,30 +13442,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14594,30 +13475,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14643,24 +13509,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14684,31 +13540,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14735,23 +13575,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14772,23 +13603,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14811,30 +13633,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14859,30 +13666,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14908,24 +13700,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -14949,31 +13731,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -15006,13 +13772,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -15045,13 +13817,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -15086,19 +13864,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15137,19 +13914,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15189,14 +13965,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -15232,20 +14013,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15286,16 +14065,21 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -15331,16 +14115,21 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -15378,22 +14167,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15435,22 +14222,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15493,17 +14278,21 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -15542,23 +14331,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -15597,36 +14383,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -15651,36 +14412,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -15707,49 +14443,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -15778,49 +14477,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -15850,37 +14512,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -15908,50 +14544,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -15982,36 +14580,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16036,36 +14609,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16092,49 +14640,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16163,49 +14674,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16235,37 +14709,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16293,50 +14741,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -16373,13 +14783,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16415,13 +14835,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16459,9 +14889,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -16469,16 +14900,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16518,9 +14946,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -16528,16 +14957,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16578,14 +15004,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16624,10 +15059,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -16635,16 +15070,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16686,16 +15118,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16734,16 +15175,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16784,9 +15234,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -16794,19 +15245,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16849,9 +15296,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -16859,19 +15307,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16915,17 +15359,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -16967,10 +15419,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -16978,19 +15430,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -20461,39 +18909,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20514,40 +18937,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20570,50 +18967,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20638,51 +19000,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20708,40 +19034,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20765,51 +19065,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20836,39 +19100,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20889,40 +19128,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -20945,50 +19158,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21013,51 +19191,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21083,40 +19225,14 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21140,51 +19256,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -21217,14 +19297,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21259,15 +19348,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21304,8 +19401,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21313,16 +19412,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21361,9 +19457,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21371,16 +19468,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21420,15 +19514,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21466,9 +19568,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21476,16 +19579,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21526,17 +19626,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21574,18 +19682,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21625,8 +19740,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21634,19 +19751,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21688,9 +19801,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21698,19 +19812,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21753,18 +19863,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -21805,9 +19922,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -21815,19 +19933,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -23434,15 +21548,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23463,16 +21576,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23495,20 +21606,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23533,21 +21639,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23573,16 +21673,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23606,21 +21704,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23648,25 +21740,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -23687,26 +21768,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -23729,30 +21798,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -23777,31 +21831,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -23827,26 +21865,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -23870,31 +21896,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -23921,25 +21931,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -23960,26 +21959,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24002,30 +21989,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24050,31 +22022,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24100,26 +22056,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24143,31 +22087,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -24200,14 +22128,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24242,15 +22173,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24287,19 +22220,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24338,20 +22270,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24391,15 +22321,17 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24437,20 +22369,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24491,17 +22421,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24539,18 +22471,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24590,22 +22523,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24647,23 +22578,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24706,18 +22634,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24758,23 +22687,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -24813,38 +22739,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -24869,39 +22768,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -24928,51 +22799,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25001,52 +22833,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25076,39 +22868,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25136,52 +22900,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -25212,38 +22936,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25268,39 +22965,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25327,51 +22996,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25400,52 +23030,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25475,39 +23065,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25535,52 +23097,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -25617,15 +23139,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25661,16 +23191,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25708,11 +23245,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -25720,16 +23256,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25769,12 +23302,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -25782,16 +23313,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25832,16 +23360,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25880,12 +23415,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -25893,16 +23426,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25944,18 +23474,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -25994,19 +23531,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26047,11 +23590,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -26059,19 +23601,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26114,12 +23652,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -26127,19 +23663,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26183,19 +23715,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -26237,12 +23775,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -26250,19 +23786,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -29717,40 +27249,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29771,40 +27277,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29827,53 +27307,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29898,53 +27340,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -29970,41 +27374,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30028,54 +27405,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30102,40 +27440,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30156,40 +27468,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30212,53 +27498,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30283,53 +27531,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30355,41 +27565,14 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30413,54 +27596,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> @@ -30493,16 +27637,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -30535,16 +27688,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -30579,11 +27741,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -30591,16 +27752,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -30639,11 +27797,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -30651,16 +27808,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -30700,17 +27854,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -30746,12 +27908,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -30759,16 +27919,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -30809,19 +27966,27 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -30857,19 +28022,27 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -30907,11 +28080,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -30919,19 +28091,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -30973,11 +28141,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -30985,19 +28152,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -31040,20 +28203,27 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -31092,12 +28262,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -31105,19 +28273,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -32724,16 +29888,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32754,16 +29916,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32786,23 +29946,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32827,23 +29979,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32869,17 +30013,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32903,24 +30044,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32948,26 +30080,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -32988,26 +30108,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33030,33 +30138,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33081,33 +30171,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33133,27 +30205,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33177,34 +30236,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33231,26 +30271,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33271,26 +30299,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33313,33 +30329,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33364,33 +30362,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33416,27 +30396,14 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33460,34 +30427,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> @@ -33520,16 +30468,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -33562,16 +30513,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -33606,22 +30560,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -33660,22 +30610,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -33715,17 +30661,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -33761,23 +30709,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -33818,19 +30761,21 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -33866,19 +30811,21 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -33916,25 +30863,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -33976,25 +30918,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34037,20 +30974,21 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -34089,26 +31027,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 -; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vmovd %ecx, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -34147,39 +31079,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34204,39 +31108,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34263,52 +31139,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34337,52 +31173,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34412,40 +31208,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34473,53 +31240,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -34550,39 +31276,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -34607,39 +31305,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -34666,52 +31336,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -34740,52 +31370,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -34815,40 +31405,11 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -34876,53 +31437,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -34959,16 +31479,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35004,16 +31531,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35051,12 +31585,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35064,16 +31596,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35113,12 +31642,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35126,16 +31653,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35176,17 +31700,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35225,13 +31755,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35239,16 +31766,13 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35290,19 +31814,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35341,19 +31871,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35394,12 +31930,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35407,19 +31941,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35462,12 +31992,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35475,19 +32003,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35531,20 +32055,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -35586,13 +32115,10 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 -; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: kmovw %edi, %k0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftrw $3, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftrw $2, %k0, %k1 @@ -35600,19 +32126,15 @@ ; NoVLX-NEXT: kshiftrw $1, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: vmovd %esi, %xmm1 -; NoVLX-NEXT: vpinsrb $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -36635,37 +33157,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -36686,37 +33185,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovaps (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -36738,38 +33214,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -36793,38 +33245,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -36851,38 +33277,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vmovaps (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -36910,39 +33310,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 -; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -36972,37 +33345,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37023,37 +33373,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovaps (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37075,38 +33402,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> @@ -37130,38 +33433,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -37188,38 +33465,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vmovaps (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -37247,39 +33498,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 -; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -37315,13 +33539,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -37354,13 +33590,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovaps (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -37394,14 +33642,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -37437,15 +33696,24 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37483,15 +33751,24 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vmovaps (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37530,16 +33807,24 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 -; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37580,16 +33865,27 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -37625,16 +33921,27 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovaps (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -37671,17 +33978,27 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -37720,18 +34037,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37772,18 +34097,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vmovaps (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -37825,19 +34158,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 -; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -39538,13 +35878,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -39565,13 +35906,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -39593,14 +35935,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -39624,16 +35966,15 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -39657,16 +35998,15 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -39691,17 +36031,15 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax -; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: andb $3, %al +; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -39729,23 +36067,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -39766,23 +36095,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -39804,24 +36124,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -39845,24 +36155,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -39889,24 +36187,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -39934,25 +36220,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -39982,23 +36255,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40019,23 +36283,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40057,24 +36312,14 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> @@ -40098,24 +36343,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -40142,24 +36375,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -40187,25 +36408,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -40241,13 +36449,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -40280,13 +36494,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -40320,14 +36540,19 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax @@ -40363,15 +36588,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40409,15 +36637,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40456,16 +36687,18 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40506,16 +36739,21 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -40551,16 +36789,21 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -40597,17 +36840,21 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx @@ -40646,18 +36893,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40698,18 +36947,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vmovapd (%rsi), %xmm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40751,19 +37002,20 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kmovw %k0, %ecx ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -40802,36 +37054,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -40856,36 +37083,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -40911,37 +37113,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -40969,39 +37145,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41029,39 +37178,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41090,40 +37212,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 -; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax ; NoVLX-NEXT: vzeroupper @@ -41154,36 +37248,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41208,36 +37277,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41263,37 +37307,11 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41321,39 +37339,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41381,39 +37372,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41442,40 +37406,12 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 -; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: andl $1, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $1, %k2, %k2 -; NoVLX-NEXT: kshiftlw $1, %k2, %k2 -; NoVLX-NEXT: korw %k1, %k2, %k1 -; NoVLX-NEXT: kshiftrw $1, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $14, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $13, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $12, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %ax killed %ax killed %eax ; NoVLX-NEXT: vzeroupper @@ -41512,13 +37448,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41554,13 +37500,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41597,14 +37553,23 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41643,16 +37608,24 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41691,16 +37664,24 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vmovapd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41740,17 +37721,24 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 -; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 +; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi +; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41792,16 +37780,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41840,16 +37837,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NoVLX-NEXT: vmovapd (%rdi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41889,17 +37895,25 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41941,19 +37955,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -41995,19 +38016,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vmovapd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -42050,20 +38078,26 @@ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 -; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 +; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftrw $3, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %eax +; NoVLX-NEXT: kshiftrw $2, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %ecx +; NoVLX-NEXT: kshiftrw $1, %k0, %k1 +; NoVLX-NEXT: kmovw %k1, %edx +; NoVLX-NEXT: kmovw %k0, %esi ; NoVLX-NEXT: kxorw %k0, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -43281,40 +39315,14 @@ ; ; NoVLX-LABEL: mask_zero_lower: ; NoVLX: # %bb.0: -; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] -; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: vpextrb $4, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k0 -; NoVLX-NEXT: vpextrb $0, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k2 -; NoVLX-NEXT: kshiftrw $4, %k2, %k3 -; NoVLX-NEXT: kxorw %k1, %k3, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $11, %k1, %k1 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftrw $5, %k1, %k2 -; NoVLX-NEXT: kxorw %k0, %k2, %k0 -; NoVLX-NEXT: kshiftlw $15, %k0, %k0 -; NoVLX-NEXT: kshiftrw $10, %k0, %k0 -; NoVLX-NEXT: kxorw %k1, %k0, %k0 -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: vpextrb $8, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $9, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: vpextrb $12, %xmm0, %eax -; NoVLX-NEXT: kmovw %eax, %k2 -; NoVLX-NEXT: kxorw %k2, %k1, %k1 -; NoVLX-NEXT: kshiftlw $15, %k1, %k1 -; NoVLX-NEXT: kshiftrw $8, %k1, %k1 -; NoVLX-NEXT: kxorw %k0, %k1, %k0 +; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NoVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: def %al killed %al killed %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq %cmp = icmp ult <4 x i32> %a, zeroinitializer %concat = shufflevector <4 x i1> %cmp, <4 x i1> zeroinitializer, <8 x i32> Index: llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll +++ llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll @@ -48,7 +48,6 @@ ; ; AVX512F-LABEL: ext_i2_2i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: andb $3, %dil ; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} ; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 @@ -98,7 +97,6 @@ ; ; AVX512F-LABEL: ext_i4_4i32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: andb $15, %dil ; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} ; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 @@ -289,7 +287,6 @@ ; ; AVX512F-LABEL: ext_i4_4i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: andb $15, %dil ; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} ; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 Index: llvm/trunk/test/CodeGen/X86/compress_expand.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/compress_expand.ll +++ llvm/trunk/test/CodeGen/X86/compress_expand.ll @@ -200,11 +200,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL-NEXT: vmovdqa %ymm1, %ymm1 -; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL-NEXT: kshiftlw $12, %k0, %k0 +; KNL-NEXT: kshiftrw $12, %k0, %k1 ; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq call void @llvm.masked.compressstore.v4i64(<4 x i64> %V, i64* %base, <4 x i1> %mask) @@ -223,10 +221,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vpsllq $63, %xmm1, %xmm1 -; KNL-NEXT: vpsraq $63, %zmm1, %zmm1 -; KNL-NEXT: vmovdqa %xmm1, %xmm1 -; KNL-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL-NEXT: kshiftlw $14, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k1 ; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq call void @llvm.masked.compressstore.v2i64(<2 x i64> %V, i64* %base, <2 x i1> %mask) @@ -245,10 +242,9 @@ ; KNL: # %bb.0: ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL-NEXT: vmovdqa %xmm1, %xmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL-NEXT: kshiftlw $12, %k0, %k0 +; KNL-NEXT: kshiftrw $12, %k0, %k1 ; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq call void @llvm.masked.compressstore.v4f32(<4 x float> %V, float* %base, <4 x i1> %mask) @@ -269,11 +265,9 @@ ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; KNL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; KNL-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL-NEXT: vmovaps %xmm1, %xmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL-NEXT: vpcmpeqq %zmm2, %zmm1, %k0 +; KNL-NEXT: kshiftlw $14, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k1 ; KNL-NEXT: vexpandps (%rdi), %zmm0 {%k1} ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; KNL-NEXT: retq @@ -296,11 +290,9 @@ ; KNL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; KNL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; KNL-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1 -; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL-NEXT: vmovaps %xmm1, %xmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL-NEXT: vpcmpeqq %zmm2, %zmm1, %k0 +; KNL-NEXT: kshiftlw $14, %k0, %k0 +; KNL-NEXT: kshiftrw $14, %k0, %k1 ; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1} ; KNL-NEXT: retq %mask = icmp eq <2 x i32> %trigger, zeroinitializer Index: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll +++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll @@ -812,11 +812,12 @@ ; KNL_64-LABEL: test15: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_64-NEXT: vmovdqa %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm2 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm0 -; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1} +; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 +; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm1 +; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1} ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -824,12 +825,13 @@ ; KNL_32-LABEL: test15: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_32-NEXT: vmovdqa %xmm1, %xmm1 +; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm2 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm0 -; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm0 {%k1} +; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1 +; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1} ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -864,12 +866,10 @@ ; KNL_64-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 ; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_64-NEXT: vmovdqa %ymm1, %ymm1 +; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1} ; KNL_64-NEXT: vmovapd %ymm2, %ymm0 ; KNL_64-NEXT: retq @@ -879,13 +879,11 @@ ; KNL_32-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 ; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_32-NEXT: vmovdqa %ymm1, %ymm1 +; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1} ; KNL_32-NEXT: vmovapd %ymm2, %ymm0 ; KNL_32-NEXT: retl @@ -919,9 +917,10 @@ ; KNL_64-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 ; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0 -; KNL_64-NEXT: vmovdqa %xmm1, %xmm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1} ; KNL_64-NEXT: vmovapd %xmm2, %xmm0 ; KNL_64-NEXT: vzeroupper @@ -932,10 +931,11 @@ ; KNL_32-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 ; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0 -; KNL_32-NEXT: vmovdqa %xmm1, %xmm1 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1} ; KNL_32-NEXT: vmovapd %xmm2, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -979,9 +979,10 @@ ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_64-NEXT: vmovdqa %xmm2, %xmm2 -; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 +; KNL_64-NEXT: vpslld $31, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -990,10 +991,11 @@ ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 +; KNL_32-NEXT: vpslld $31, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1022,11 +1024,9 @@ ; KNL_64-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 ; KNL_64-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_64-NEXT: vmovdqa %ymm1, %ymm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vscatterqpd %zmm0, (%rdi,%zmm2,8) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -1036,12 +1036,10 @@ ; KNL_32-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 ; KNL_32-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_32-NEXT: vmovdqa %ymm1, %ymm1 +; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm2,8) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1073,10 +1071,10 @@ ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 -; KNL_64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm2, %xmm2 -; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -1084,12 +1082,12 @@ ; KNL_32-LABEL: test20: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 +; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; KNL_32-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm2, %xmm2 ; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1 -; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2 -; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1119,10 +1117,11 @@ ; KNL_64-LABEL: test21: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 -; KNL_64-NEXT: vmovdqa %xmm2, %xmm2 +; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -1131,10 +1130,11 @@ ; KNL_32: # %bb.0: ; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1 ; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 +; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1170,12 +1170,12 @@ ; KNL_64-LABEL: test22: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm2 killed %xmm2 def %ymm2 +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm1, %xmm1 ; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1} ; KNL_64-NEXT: vmovaps %xmm2, %xmm0 ; KNL_64-NEXT: vzeroupper @@ -1184,13 +1184,13 @@ ; KNL_32-LABEL: test22: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm2 killed %xmm2 def %ymm2 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm1, %xmm1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1} ; KNL_32-NEXT: vmovaps %xmm2, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -1225,10 +1225,10 @@ ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm2 killed %xmm2 def %ymm2 ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm1, %xmm1 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1} ; KNL_64-NEXT: vmovaps %xmm2, %xmm0 ; KNL_64-NEXT: vzeroupper @@ -1238,11 +1238,11 @@ ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm2 killed %xmm2 def %ymm2 ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm1, %xmm1 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1} ; KNL_32-NEXT: vmovaps %xmm2, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -1275,30 +1275,30 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) { ; KNL_64-LABEL: test23: ; KNL_64: # %bb.0: -; KNL_64-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm1, %xmm1 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1} -; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 +; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k1} +; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test23: ; KNL_32: # %bb.0: +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm1, %xmm1 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1} -; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 +; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1} +; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl ; @@ -1332,27 +1332,27 @@ ; KNL_64-LABEL: test23b: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; KNL_64-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] -; KNL_64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm1, %xmm1 -; KNL_64-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1} -; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 +; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k1} +; KNL_64-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test23b: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] -; KNL_32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm1, %xmm1 -; KNL_32-NEXT: vpslld $31, %ymm1, %ymm1 -; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1} -; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero +; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 +; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1} +; KNL_32-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl ; @@ -1433,9 +1433,10 @@ ; KNL_64-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 ; KNL_64-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_64-NEXT: vpsraq $32, %zmm0, %zmm0 -; KNL_64-NEXT: vmovdqa %xmm1, %xmm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1} ; KNL_64-NEXT: vmovdqa %xmm2, %xmm0 ; KNL_64-NEXT: vzeroupper @@ -1446,10 +1447,11 @@ ; KNL_32-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 ; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0 -; KNL_32-NEXT: vmovdqa %xmm1, %xmm1 +; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 +; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1} ; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -1500,10 +1502,8 @@ ; KNL_32-NEXT: vpsllq $32, %xmm0, %xmm0 ; KNL_32-NEXT: vpsraq $32, %zmm0, %zmm0 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 +; KNL_32-NEXT: movb $3, %cl +; KNL_32-NEXT: kmovw %ecx, %k1 ; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1} ; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 ; KNL_32-NEXT: vzeroupper @@ -1597,10 +1597,8 @@ ; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1 ; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1 ; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; KNL_32-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 +; KNL_32-NEXT: movb $3, %al +; KNL_32-NEXT: kmovw %eax, %k1 ; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -1686,83 +1684,80 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) { ; KNL_64-LABEL: test30: ; KNL_64: # %bb.0: +; KNL_64-NEXT: # kill: def %xmm3 killed %xmm3 def %zmm3 +; KNL_64-NEXT: vpslld $31, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 +; KNL_64-NEXT: kmovw %k1, %eax ; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 ; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1 ; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1 -; KNL_64-NEXT: testb $1, %dil +; KNL_64-NEXT: testb $1, %al ; KNL_64-NEXT: # implicit-def: %xmm0 -; KNL_64-NEXT: jne .LBB31_1 -; KNL_64-NEXT: # %bb.2: # %else -; KNL_64-NEXT: testb $1, %sil -; KNL_64-NEXT: jne .LBB31_3 -; KNL_64-NEXT: .LBB31_4: # %else2 -; KNL_64-NEXT: testb $1, %dl -; KNL_64-NEXT: jne .LBB31_5 -; KNL_64-NEXT: .LBB31_6: # %else5 -; KNL_64-NEXT: vmovd %edi, %xmm1 -; KNL_64-NEXT: vpinsrb $4, %esi, %xmm1, %xmm1 -; KNL_64-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1 -; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; KNL_64-NEXT: .LBB31_1: # %cond.load +; KNL_64-NEXT: je .LBB31_2 +; KNL_64-NEXT: # %bb.1: # %cond.load ; KNL_64-NEXT: vmovq %xmm1, %rax ; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; KNL_64-NEXT: testb $1, %sil +; KNL_64-NEXT: .LBB31_2: # %else +; KNL_64-NEXT: kshiftrw $1, %k1, %k0 +; KNL_64-NEXT: kmovw %k0, %eax +; KNL_64-NEXT: testb $1, %al ; KNL_64-NEXT: je .LBB31_4 -; KNL_64-NEXT: .LBB31_3: # %cond.load1 +; KNL_64-NEXT: # %bb.3: # %cond.load1 ; KNL_64-NEXT: vpextrq $1, %xmm1, %rax ; KNL_64-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0 -; KNL_64-NEXT: testb $1, %dl +; KNL_64-NEXT: .LBB31_4: # %else2 +; KNL_64-NEXT: kshiftrw $2, %k1, %k0 +; KNL_64-NEXT: kmovw %k0, %eax +; KNL_64-NEXT: testb $1, %al ; KNL_64-NEXT: je .LBB31_6 -; KNL_64-NEXT: .LBB31_5: # %cond.load4 +; KNL_64-NEXT: # %bb.5: # %cond.load4 ; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1 ; KNL_64-NEXT: vmovq %xmm1, %rax ; KNL_64-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0 -; KNL_64-NEXT: jmp .LBB31_6 +; KNL_64-NEXT: .LBB31_6: # %else5 +; KNL_64-NEXT: vmovdqa32 %zmm0, %zmm3 {%k1} +; KNL_64-NEXT: vmovdqa %xmm3, %xmm0 +; KNL_64-NEXT: vzeroupper +; KNL_64-NEXT: retq ; ; KNL_32-LABEL: test30: ; KNL_32: # %bb.0: -; KNL_32-NEXT: pushl %esi -; KNL_32-NEXT: .cfi_def_cfa_offset 8 -; KNL_32-NEXT: .cfi_offset %esi, -8 -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %edx +; KNL_32-NEXT: subl $12, %esp +; KNL_32-NEXT: .cfi_def_cfa_offset 16 +; KNL_32-NEXT: vpslld $31, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k1 +; KNL_32-NEXT: kmovw %k1, %eax ; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1 -; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1 -; KNL_32-NEXT: testb $1, %dl -; KNL_32-NEXT: # implicit-def: %xmm0 -; KNL_32-NEXT: jne .LBB31_1 -; KNL_32-NEXT: # %bb.2: # %else -; KNL_32-NEXT: testb $1, %cl -; KNL_32-NEXT: jne .LBB31_3 -; KNL_32-NEXT: .LBB31_4: # %else2 +; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2 +; KNL_32-NEXT: testb $1, %al +; KNL_32-NEXT: # implicit-def: %xmm1 +; KNL_32-NEXT: je .LBB31_2 +; KNL_32-NEXT: # %bb.1: # %cond.load +; KNL_32-NEXT: vmovd %xmm2, %eax +; KNL_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; KNL_32-NEXT: .LBB31_2: # %else +; KNL_32-NEXT: kshiftrw $1, %k1, %k0 +; KNL_32-NEXT: kmovw %k0, %eax ; KNL_32-NEXT: testb $1, %al -; KNL_32-NEXT: jne .LBB31_5 -; KNL_32-NEXT: .LBB31_6: # %else5 -; KNL_32-NEXT: vmovd %edx, %xmm1 -; KNL_32-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 -; KNL_32-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 -; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 -; KNL_32-NEXT: popl %esi -; KNL_32-NEXT: retl -; KNL_32-NEXT: .LBB31_1: # %cond.load -; KNL_32-NEXT: vmovd %xmm1, %esi -; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; KNL_32-NEXT: testb $1, %cl ; KNL_32-NEXT: je .LBB31_4 -; KNL_32-NEXT: .LBB31_3: # %cond.load1 -; KNL_32-NEXT: vpextrd $1, %xmm1, %esi -; KNL_32-NEXT: vpinsrd $1, (%esi), %xmm0, %xmm0 +; KNL_32-NEXT: # %bb.3: # %cond.load1 +; KNL_32-NEXT: vpextrd $1, %xmm2, %eax +; KNL_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1 +; KNL_32-NEXT: .LBB31_4: # %else2 +; KNL_32-NEXT: vmovdqa {{[0-9]+}}(%esp), %xmm0 +; KNL_32-NEXT: kshiftrw $2, %k1, %k0 +; KNL_32-NEXT: kmovw %k0, %eax ; KNL_32-NEXT: testb $1, %al ; KNL_32-NEXT: je .LBB31_6 -; KNL_32-NEXT: .LBB31_5: # %cond.load4 -; KNL_32-NEXT: vpextrd $2, %xmm1, %esi -; KNL_32-NEXT: vpinsrd $2, (%esi), %xmm0, %xmm0 -; KNL_32-NEXT: jmp .LBB31_6 +; KNL_32-NEXT: # %bb.5: # %cond.load4 +; KNL_32-NEXT: vpextrd $2, %xmm2, %eax +; KNL_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1 +; KNL_32-NEXT: .LBB31_6: # %else5 +; KNL_32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; KNL_32-NEXT: addl $12, %esp +; KNL_32-NEXT: vzeroupper +; KNL_32-NEXT: retl ; ; SKX-LABEL: test30: ; SKX: # %bb.0: @@ -2355,11 +2350,9 @@ ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_64-NEXT: vmovdqa %ymm1, %ymm1 -; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1 +; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_64-NEXT: kshiftlw $12, %k0, %k0 +; KNL_64-NEXT: kshiftrw $12, %k0, %k1 ; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1} ; KNL_64-NEXT: vpaddq %ymm1, %ymm1, %ymm0 ; KNL_64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 @@ -2376,12 +2369,10 @@ ; KNL_32-NEXT: subl $32, %esp ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %ymm0 ; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1 -; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1 -; KNL_32-NEXT: vmovdqa %ymm1, %ymm1 +; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL_32-NEXT: kshiftlw $12, %k0, %k0 +; KNL_32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0 -; KNL_32-NEXT: vpsllq $63, %zmm1, %zmm1 -; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; KNL_32-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1} ; KNL_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0 ; KNL_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 @@ -2547,14 +2538,14 @@ ; KNL_64-LABEL: large_index: ; KNL_64: # %bb.0: ; KNL_64-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 -; KNL_64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; KNL_64-NEXT: vmovaps %xmm0, %xmm0 -; KNL_64-NEXT: vmovq %rcx, %xmm2 -; KNL_64-NEXT: vmovq %rsi, %xmm3 -; KNL_64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] -; KNL_64-NEXT: vpslld $31, %ymm0, %ymm0 -; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm1 {%k1} +; KNL_64-NEXT: vpsllq $63, %xmm0, %xmm0 +; KNL_64-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 +; KNL_64-NEXT: vmovq %rcx, %xmm0 +; KNL_64-NEXT: vmovq %rsi, %xmm2 +; KNL_64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k1} ; KNL_64-NEXT: vmovaps %xmm1, %xmm0 ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -2562,16 +2553,16 @@ ; KNL_32-LABEL: large_index: ; KNL_32: # %bb.0: ; KNL_32-NEXT: # kill: def %xmm1 killed %xmm1 def %ymm1 -; KNL_32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; KNL_32-NEXT: vmovaps %xmm0, %xmm0 +; KNL_32-NEXT: vpsllq $63, %xmm0, %xmm0 +; KNL_32-NEXT: vptestmq %zmm0, %zmm0, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; KNL_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; KNL_32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; KNL_32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2 -; KNL_32-NEXT: vpslld $31, %ymm0, %ymm0 -; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm1 {%k1} +; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; KNL_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 +; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm1 {%k1} ; KNL_32-NEXT: vmovaps %xmm1, %xmm0 ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -2700,9 +2691,10 @@ ; KNL_64-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL_64-NEXT: vpsllq $32, %xmm1, %xmm1 ; KNL_64-NEXT: vpsraq $32, %zmm1, %zmm1 -; KNL_64-NEXT: vmovdqa %xmm2, %xmm2 -; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k1 +; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_64-NEXT: kshiftlw $14, %k0, %k0 +; KNL_64-NEXT: kshiftrw $14, %k0, %k1 ; KNL_64-NEXT: vscatterqpd %zmm0, (%rdi,%zmm1,8) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -2712,10 +2704,11 @@ ; KNL_32-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1 ; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1 -; KNL_32-NEXT: vmovdqa %xmm2, %xmm2 +; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2 +; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0 +; KNL_32-NEXT: kshiftlw $14, %k0, %k0 +; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vpsllq $63, %zmm2, %zmm2 -; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm1,8) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl Index: llvm/trunk/test/CodeGen/X86/masked_memop.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/masked_memop.ll +++ llvm/trunk/test/CodeGen/X86/masked_memop.ll @@ -99,10 +99,15 @@ ; ; AVX512F-LABEL: test6: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test6: @@ -127,10 +132,15 @@ ; ; AVX512F-LABEL: test7: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vblendmps (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test7: @@ -163,10 +173,15 @@ ; ; AVX512F-LABEL: test8: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vpblendmd (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test8: @@ -197,9 +212,14 @@ ; ; AVX512F-LABEL: test9: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test9: @@ -237,11 +257,14 @@ ; ; AVX512F-LABEL: test10: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovsxdq %xmm0, %ymm0 -; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 -; AVX512F-NEXT: vblendvpd %ymm0, %ymm2, %ymm1, %ymm0 +; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: test10: @@ -277,10 +300,13 @@ ; ; AVX512F-LABEL: test10b: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovsxdq %xmm0, %ymm0 -; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 +; AVX512F-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: test10b: @@ -525,11 +551,14 @@ ; ; AVX512F-LABEL: test14: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vmovups %zmm1, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test14: @@ -569,10 +598,12 @@ ; AVX512F: ## %bb.0: ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX512F-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test15: @@ -610,12 +641,15 @@ ; ; AVX512F-LABEL: test16: ; AVX512F: ## %bb.0: +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vblendmps (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test16: @@ -659,12 +693,13 @@ ; AVX512F: ## %bb.0: ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512F-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 -; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] -; AVX512F-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] +; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} ; AVX512F-NEXT: vpmovsxdq %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test17: @@ -704,9 +739,12 @@ ; AVX512F: ## %bb.0: ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX512F-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k0 +; AVX512F-NEXT: kshiftrw $14, %k0, %k1 +; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test18: @@ -729,8 +767,11 @@ ; ; AVX512F-LABEL: load_all: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: movw $15, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: load_all: @@ -755,9 +796,12 @@ ; ; AVX512F-LABEL: mload_constmask_v4f32: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovaps {{.*#+}} xmm1 = [4294967295,0,4294967295,4294967295] -; AVX512F-NEXT: vmaskmovps (%rdi), %xmm1, %xmm2 -; AVX512F-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: movw $13, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4f32: @@ -789,9 +833,12 @@ ; ; AVX512F-LABEL: mload_constmask_v4i32: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295] -; AVX512F-NEXT: vpmaskmovd (%rdi), %xmm1, %xmm2 -; AVX512F-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: movw $14, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4i32: @@ -843,9 +890,11 @@ ; ; AVX512F-LABEL: mload_constmask_v4f64: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovapd {{.*#+}} ymm1 = [18446744073709551615,18446744073709551615,18446744073709551615,0] -; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm1, %ymm2 -; AVX512F-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512F-NEXT: movb $7, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4f64: @@ -898,9 +947,11 @@ ; ; AVX512F-LABEL: mload_constmask_v4i64: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [18446744073709551615,0,0,18446744073709551615] -; AVX512F-NEXT: vpmaskmovq (%rdi), %ymm1, %ymm2 -; AVX512F-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512F-NEXT: movb $9, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4i64: @@ -950,8 +1001,10 @@ ; ; AVX512F-LABEL: mload_constmask_v4f64_undef_passthrough: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,18446744073709551615,0] -; AVX512F-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm0 +; AVX512F-NEXT: movb $7, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4f64_undef_passthrough: @@ -979,8 +1032,10 @@ ; ; AVX512F-LABEL: mload_constmask_v4i64_undef_passthrough: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551615,0] -; AVX512F-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm0 +; AVX512F-NEXT: movb $6, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v4i64_undef_passthrough: @@ -1008,8 +1063,11 @@ ; ; AVX512F-LABEL: test21: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) +; AVX512F-NEXT: ## kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: movw $15, %ax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vmovdqu32 %zmm1, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: test21: @@ -1225,7 +1283,14 @@ ; ; AVX512F-LABEL: trunc_mask: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vmaskmovps %xmm0, %xmm2, (%rdi) +; AVX512F-NEXT: ## kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512F-NEXT: ## kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm1, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vmovups %zmm0, (%rdi) {%k1} +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; SKX-LABEL: trunc_mask: Index: llvm/trunk/test/CodeGen/X86/pr33349.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr33349.ll +++ llvm/trunk/test/CodeGen/X86/pr33349.ll @@ -8,32 +8,38 @@ define void @test(<4 x i1> %m, <4 x x86_fp80> %v, <4 x x86_fp80>*%p) local_unnamed_addr { ; KNL-LABEL: test: ; KNL: # %bb.0: # %bb -; KNL-NEXT: vpextrb $0, %xmm0, %eax +; KNL-NEXT: vpslld $31, %xmm0, %xmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftrw $1, %k0, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftrw $2, %k0, %k1 +; KNL-NEXT: kshiftrw $1, %k1, %k2 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fld1 ; KNL-NEXT: fldz ; KNL-NEXT: fld %st(0) ; KNL-NEXT: fcmovne %st(2), %st(0) -; KNL-NEXT: vpextrb $4, %xmm0, %eax -; KNL-NEXT: testb $1, %al +; KNL-NEXT: testb $1, %cl ; KNL-NEXT: fld %st(1) ; KNL-NEXT: fcmovne %st(3), %st(0) -; KNL-NEXT: vpextrb $8, %xmm0, %eax +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fld %st(2) ; KNL-NEXT: fcmovne %st(4), %st(0) -; KNL-NEXT: vpextrb $12, %xmm0, %eax +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fxch %st(3) ; KNL-NEXT: fcmovne %st(4), %st(0) ; KNL-NEXT: fstp %st(4) ; KNL-NEXT: fxch %st(3) +; KNL-NEXT: fstpt (%rdi) +; KNL-NEXT: fxch %st(1) ; KNL-NEXT: fstpt 30(%rdi) ; KNL-NEXT: fxch %st(1) ; KNL-NEXT: fstpt 20(%rdi) -; KNL-NEXT: fxch %st(1) ; KNL-NEXT: fstpt 10(%rdi) -; KNL-NEXT: fstpt (%rdi) +; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: test: Index: llvm/trunk/test/CodeGen/X86/sse-fsignum.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse-fsignum.ll +++ llvm/trunk/test/CodeGen/X86/sse-fsignum.ll @@ -10,17 +10,44 @@ ; define void @signum32a(<4 x float>*) { -; AVX-LABEL: signum32a: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovaps (%rdi), %xmm0 -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vcvtdq2ps %xmm2, %xmm2 -; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 -; AVX-NEXT: vsubps %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vmovaps %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: signum32a: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovaps (%rdi), %xmm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vcvtdq2ps %xmm2, %xmm2 +; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX1-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vmovaps %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: signum32a: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovaps (%rdi), %xmm0 +; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vcvtdq2ps %xmm2, %xmm2 +; AVX2-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX2-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: vmovaps %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signum32a: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vmovaps (%rdi), %xmm0 +; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; AVX512F-NEXT: vcvtdq2ps %xmm2, %xmm2 +; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX512F-NEXT: vsubps %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vmovaps %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq entry: %1 = load <4 x float>, <4 x float>* %0 %2 = fcmp olt <4 x float> %1, zeroinitializer @@ -33,19 +60,48 @@ } define void @signum64a(<2 x double>*) { -; AVX-LABEL: signum64a: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vmovapd (%rdi), %xmm0 -; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3] -; AVX-NEXT: vcvtdq2pd %xmm2, %xmm2 -; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX-NEXT: vsubpd %xmm0, %xmm2, %xmm0 -; AVX-NEXT: vmovapd %xmm0, (%rdi) -; AVX-NEXT: retq +; AVX1-LABEL: signum64a: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vmovapd (%rdi), %xmm0 +; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3] +; AVX1-NEXT: vcvtdq2pd %xmm2, %xmm2 +; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX1-NEXT: vsubpd %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vmovapd %xmm0, (%rdi) +; AVX1-NEXT: retq +; +; AVX2-LABEL: signum64a: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovapd (%rdi), %xmm0 +; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3] +; AVX2-NEXT: vcvtdq2pd %xmm2, %xmm2 +; AVX2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX2-NEXT: vsubpd %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: vmovapd %xmm0, (%rdi) +; AVX2-NEXT: retq +; +; AVX512F-LABEL: signum64a: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vmovapd (%rdi), %xmm0 +; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vcmpltpd %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; AVX512F-NEXT: vcvtdq2pd %xmm2, %xmm2 +; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512F-NEXT: vsubpd %xmm0, %xmm2, %xmm0 +; AVX512F-NEXT: vmovapd %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq entry: %1 = load <2 x double>, <2 x double>* %0 %2 = fcmp olt <2 x double> %1, zeroinitializer @@ -152,11 +208,11 @@ ; AVX512F: # %bb.0: # %entry ; AVX512F-NEXT: vmovapd (%rdi), %ymm0 ; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2 -; AVX512F-NEXT: vpmovqd %zmm2, %ymm2 +; AVX512F-NEXT: vcmpltpd %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; AVX512F-NEXT: vcvtdq2pd %xmm2, %ymm2 -; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX512F-NEXT: vsubpd %ymm0, %ymm2, %ymm0 ; AVX512F-NEXT: vmovapd %ymm0, (%rdi) Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll @@ -6,7 +6,14 @@ define <2 x i1> @shuf2i1_1_0(<2 x i1> %a) { ; AVX512F-LABEL: shuf2i1_1_0: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf2i1_1_0: @@ -36,9 +43,16 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) { ; AVX512F-LABEL: shuf2i1_1_2: ; AVX512F: # %bb.0: -; AVX512F-NEXT: movl $1, %eax +; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: movq $-1, %rax ; AVX512F-NEXT: vmovq %rax, %xmm1 ; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7] +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf2i1_1_2: @@ -73,7 +87,14 @@ define <4 x i1> @shuf4i1_3_2_10(<4 x i1> %a) { ; AVX512F-LABEL: shuf4i1_3_2_10: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] +; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf4i1_3_2_10: Index: llvm/trunk/test/CodeGen/X86/vselect-pcmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vselect-pcmp.ll +++ llvm/trunk/test/CodeGen/X86/vselect-pcmp.ll @@ -43,10 +43,22 @@ } define <4 x i32> @signbit_sel_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { -; AVX12F-LABEL: signbit_sel_v4i32: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4i32: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4i32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512F-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4i32: ; AVX512VL: # %bb.0: @@ -60,10 +72,22 @@ } define <2 x i64> @signbit_sel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) { -; AVX12F-LABEL: signbit_sel_v2i64: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v2i64: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v2i64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512F-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v2i64: ; AVX512VL: # %bb.0: @@ -77,10 +101,22 @@ } define <4 x float> @signbit_sel_v4f32(<4 x float> %x, <4 x float> %y, <4 x i32> %mask) { -; AVX12F-LABEL: signbit_sel_v4f32: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4f32: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4f32: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512F-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4f32: ; AVX512VL: # %bb.0: @@ -94,10 +130,22 @@ } define <2 x double> @signbit_sel_v2f64(<2 x double> %x, <2 x double> %y, <2 x i64> %mask) { -; AVX12F-LABEL: signbit_sel_v2f64: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v2f64: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v2f64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512F-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v2f64: ; AVX512VL: # %bb.0: @@ -203,10 +251,21 @@ } define <4 x i64> @signbit_sel_v4i64(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) { -; AVX12F-LABEL: signbit_sel_v4i64: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4i64: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4i64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; AVX512F-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4i64: ; AVX512VL: # %bb.0: @@ -220,10 +279,21 @@ } define <4 x double> @signbit_sel_v4f64(<4 x double> %x, <4 x double> %y, <4 x i64> %mask) { -; AVX12F-LABEL: signbit_sel_v4f64: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4f64: +; AVX12: # %bb.0: +; AVX12-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4f64: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 +; AVX512F-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4f64: ; AVX512VL: # %bb.0: @@ -256,8 +326,13 @@ ; ; AVX512F-LABEL: signbit_sel_v4f64_small_mask: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovsxdq %xmm2, %ymm2 -; AVX512F-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 def %zmm2 +; AVX512F-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 +; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 +; AVX512F-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4f64_small_mask: @@ -296,12 +371,23 @@ ; (2) FIXME: If we don't care about signed-zero (and NaN?), the compare should still get folded. define <4 x float> @signbit_sel_v4f32_fcmp(<4 x float> %x, <4 x float> %y, <4 x float> %mask) #0 { -; AVX12F-LABEL: signbit_sel_v4f32_fcmp: -; AVX12F: # %bb.0: -; AVX12F-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX12F-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 -; AVX12F-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 -; AVX12F-NEXT: retq +; AVX12-LABEL: signbit_sel_v4f32_fcmp: +; AVX12: # %bb.0: +; AVX12-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX12-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 +; AVX12-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 +; AVX12-NEXT: retq +; +; AVX512F-LABEL: signbit_sel_v4f32_fcmp: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vcmpltps %zmm2, %zmm0, %k1 +; AVX512F-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: signbit_sel_v4f32_fcmp: ; AVX512VL: # %bb.0: