Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1078,6 +1078,14 @@ /// \brief Customize the preferred legalization strategy for certain types. LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; + MVT getRegisterTypeForCallingConv(MVT VT) const override; + + MVT getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const override; + + unsigned getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const override; + bool isIntDivCheap(EVT VT, AttributeList Attr) const override; bool supportSwiftError() const override; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1720,6 +1720,9 @@ TargetLoweringBase::LegalizeTypeAction X86TargetLowering::getPreferredVectorAction(EVT VT) const { + if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) + return TypeSplitVector; + if (ExperimentalVectorWideningLegalization && VT.getVectorNumElements() != 1 && VT.getVectorElementType().getSimpleVT() != MVT::i1) @@ -1728,6 +1731,26 @@ return TargetLoweringBase::getPreferredVectorAction(VT); } +MVT X86TargetLowering::getRegisterTypeForCallingConv(MVT VT) const { + if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) + return MVT::v32i8; + return TargetLowering::getRegisterTypeForCallingConv(VT); +} + +MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) + return MVT::v32i8; + return TargetLowering::getRegisterTypeForCallingConv(Context, VT); +} + +unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) + return 1; + return TargetLowering::getNumRegistersForCallingConv(Context, VT); +} + EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext& Context, EVT VT) const { Index: test/CodeGen/X86/avg-mask.ll =================================================================== --- test/CodeGen/X86/avg-mask.ll +++ test/CodeGen/X86/avg-mask.ll @@ -60,22 +60,16 @@ define <32 x i8> @avg_v32i8_mask(<32 x i8> %a, <32 x i8> %b, <32 x i8> %src, i32 %mask) nounwind { ; AVX512F-LABEL: avg_v32i8_mask: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: shrl $16, %edi ; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 +; AVX512F-NEXT: kmovw %edi, %k2 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v32i8_mask: @@ -98,22 +92,16 @@ define <32 x i8> @avg_v32i8_maskz(<32 x i8> %a, <32 x i8> %b, i32 %mask) nounwind { ; AVX512F-LABEL: avg_v32i8_maskz: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: shrl $16, %edi ; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 +; AVX512F-NEXT: kmovw %edi, %k2 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v32i8_maskz: @@ -135,34 +123,36 @@ define <64 x i8> @avg_v64i8_mask(<64 x i8> %a, <64 x i8> %b, <64 x i8> %src, i64 %mask) nounwind { ; AVX512F-LABEL: avg_v64i8_mask: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $64, %rsp ; AVX512F-NEXT: movq %rdi, %rax -; AVX512F-NEXT: shrq $32, %rax -; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: movl %edi, (%rsp) +; AVX512F-NEXT: movq %rdi, %rcx +; AVX512F-NEXT: kmovw %edi, %k2 +; AVX512F-NEXT: movl %edi, %edx +; AVX512F-NEXT: shrl $16, %edx +; AVX512F-NEXT: shrq $48, %rax +; AVX512F-NEXT: shrq $32, %rcx ; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} +; AVX512F-NEXT: kmovw %ecx, %k1 +; AVX512F-NEXT: kmovw %eax, %k3 +; AVX512F-NEXT: kmovw %edx, %k4 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k4} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 -; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1 -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm6 +; AVX512F-NEXT: vpblendvb %xmm3, %xmm2, %xmm6, %xmm2 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 -; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: vpblendvb %xmm3, %xmm0, %xmm4, %xmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z} +; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm4 +; AVX512F-NEXT: vpblendvb %xmm3, %xmm2, %xmm4, %xmm2 +; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} +; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 +; AVX512F-NEXT: vpblendvb %xmm3, %xmm1, %xmm5, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v64i8_mask: @@ -185,34 +175,34 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind { ; AVX512F-LABEL: avg_v64i8_maskz: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $64, %rsp ; AVX512F-NEXT: movq %rdi, %rax -; AVX512F-NEXT: shrq $32, %rax -; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: movl %edi, (%rsp) +; AVX512F-NEXT: movq %rdi, %rcx +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: movl %edi, %edx +; AVX512F-NEXT: shrl $16, %edx +; AVX512F-NEXT: shrq $48, %rax +; AVX512F-NEXT: shrq $32, %rcx ; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 +; AVX512F-NEXT: kmovw %ecx, %k2 +; AVX512F-NEXT: kmovw %eax, %k3 +; AVX512F-NEXT: kmovw %edx, %k4 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} +; AVX512F-NEXT: vpand %xmm0, %xmm2, %xmm2 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k4} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 -; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 +; AVX512F-NEXT: vpand %xmm0, %xmm3, %xmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z} +; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 +; AVX512F-NEXT: vpand %xmm2, %xmm3, %xmm2 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 -; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: vpand %xmm1, %xmm3, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v64i8_maskz: @@ -342,29 +332,17 @@ define <32 x i16> @avg_v32i16_mask(<32 x i16> %a, <32 x i16> %b, <32 x i16> %src, i32 %mask) nounwind { ; AVX512F-LABEL: avg_v32i16_mask: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; AVX512F-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} -; AVX512F-NEXT: vpmovdb %zmm6, %xmm6 -; AVX512F-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm7, %xmm7 +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: shrl $16, %edi ; AVX512F-NEXT: vpavgw %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpavgw %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero -; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2 -; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2 +; AVX512F-NEXT: kmovw %edi, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero -; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2 -; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v32i16_mask: @@ -387,29 +365,17 @@ define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind { ; AVX512F-LABEL: avg_v32i16_maskz: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; AVX512F-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z} -; AVX512F-NEXT: vpmovdb %zmm4, %xmm4 -; AVX512F-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm5, %xmm5 +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: shrl $16, %edi ; AVX512F-NEXT: vpavgw %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpavgw %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero -; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2 -; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2 +; AVX512F-NEXT: kmovw %edi, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 ; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero -; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2 -; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 ; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v32i16_maskz: Index: test/CodeGen/X86/avx512-ext.ll =================================================================== --- test/CodeGen/X86/avx512-ext.ll +++ test/CodeGen/X86/avx512-ext.ll @@ -152,18 +152,17 @@ define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_32x8mem_to_32x16: ; KNL: # %bb.0: -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 -; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 -; KNL-NEXT: vpand %ymm2, %ymm3, %ymm2 -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1 -; KNL-NEXT: vmovdqa %ymm2, %ymm0 +; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0 +; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 +; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_32x8mem_to_32x16: @@ -181,18 +180,17 @@ define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_32x8mem_to_32x16: ; KNL: # %bb.0: -; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm1 -; KNL-NEXT: vpmovsxbw (%rdi), %ymm2 -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 -; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 -; KNL-NEXT: vpand %ymm2, %ymm3, %ymm2 -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2 +; KNL-NEXT: vpmovsxbw (%rdi), %ymm3 ; KNL-NEXT: vpsllw $15, %ymm0, %ymm0 ; KNL-NEXT: vpsraw $15, %ymm0, %ymm0 -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm1 -; KNL-NEXT: vmovdqa %ymm2, %ymm0 +; KNL-NEXT: vpand %ymm3, %ymm0, %ymm0 +; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 +; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_32x8mem_to_32x16: @@ -227,18 +225,18 @@ define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { ; KNL-LABEL: zext_32x8_to_32x16_mask: ; KNL: # %bb.0: -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 +; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 -; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 -; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3 +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 -; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vpsllw $15, %ymm2, %ymm1 +; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1 ; KNL-NEXT: retq ; ; SKX-LABEL: zext_32x8_to_32x16_mask: @@ -272,18 +270,18 @@ define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { ; KNL-LABEL: sext_32x8_to_32x16_mask: ; KNL: # %bb.0: -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 -; KNL-NEXT: vpmovsxbw %xmm2, %ymm2 -; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 -; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 -; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 +; KNL-NEXT: vextracti128 $1, %ymm1, %xmm2 +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3 +; KNL-NEXT: vpmovsxbw %xmm3, %ymm3 +; KNL-NEXT: vpmovsxbw %xmm0, %ymm0 ; KNL-NEXT: vpsllw $15, %ymm1, %ymm1 ; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 -; KNL-NEXT: vpand %ymm2, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0 +; KNL-NEXT: vpsllw $15, %ymm2, %ymm1 +; KNL-NEXT: vpsraw $15, %ymm1, %ymm1 +; KNL-NEXT: vpand %ymm3, %ymm1, %ymm1 ; KNL-NEXT: retq ; ; SKX-LABEL: sext_32x8_to_32x16_mask: Index: test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- test/CodeGen/X86/avx512-insert-extract.ll +++ test/CodeGen/X86/avx512-insert-extract.ll @@ -782,39 +782,20 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32> %y) { ; KNL-LABEL: test_insertelement_v32i1: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: .cfi_offset %rbp, -16 -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: .cfi_def_cfa_register %rbp -; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $32, %rsp -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: setb %al -; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k1 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 -; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: movl (%rsp), %eax -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp +; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0 +; KNL-NEXT: kshiftrw $4, %k0, %k1 +; KNL-NEXT: kmovw %eax, %k2 +; KNL-NEXT: kxorw %k2, %k1, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $11, %k1, %k1 +; KNL-NEXT: kxorw %k0, %k1, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: orl %ecx, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -1014,7 +995,10 @@ ; KNL-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; KNL-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpextrb $2, %xmm0, %eax +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftrw $2, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -1041,7 +1025,10 @@ ; KNL-NEXT: vpxor %ymm0, %ymm1, %ymm0 ; KNL-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpextrb $15, %xmm0, %eax +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andb $1, %al ; KNL-NEXT: movb $4, %cl ; KNL-NEXT: subb %al, %cl @@ -1074,7 +1061,10 @@ ; KNL-NEXT: vpxor %ymm0, %ymm1, %ymm0 ; KNL-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpextrb $15, %xmm0, %eax +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andb $1, %al ; KNL-NEXT: movb $4, %cl ; KNL-NEXT: subb %al, %cl @@ -1718,26 +1708,25 @@ ; KNL-NEXT: movq %rsp, %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $96, %rsp +; KNL-NEXT: subq $64, %rsp ; KNL-NEXT: ## kill: def %esi killed %esi def %rsi ; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; KNL-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; KNL-NEXT: andl $31, %esi ; KNL-NEXT: testb %dil, %dil -; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) -; KNL-NEXT: setne 32(%rsp,%rsi) -; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: vmovdqa %ymm0, (%rsp) +; KNL-NEXT: setne (%rsp,%rsi) +; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: movl (%rsp), %eax +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: shll $16, %eax +; KNL-NEXT: orl %ecx, %eax ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper @@ -1783,7 +1772,7 @@ ; KNL-NEXT: movq %rsp, %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-64, %rsp -; KNL-NEXT: subq $192, %rsp +; KNL-NEXT: subq $128, %rsp ; KNL-NEXT: ## kill: def %esi killed %esi def %rsi ; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; KNL-NEXT: vpxor %ymm2, %ymm0, %ymm0 @@ -1793,30 +1782,32 @@ ; KNL-NEXT: andl $63, %esi ; KNL-NEXT: testb %dil, %dil ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) -; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) -; KNL-NEXT: setne 64(%rsp,%rsi) -; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0 +; KNL-NEXT: vmovdqa %ymm0, (%rsp) +; KNL-NEXT: setne (%rsp,%rsi) +; KNL-NEXT: vmovdqa (%rsp), %ymm0 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 -; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 +; KNL-NEXT: vpmovsxbd %xmm0, %zmm2 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vextracti128 $1, %ymm1, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: vextracti128 $1, %ymm1, %xmm0 +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: movl (%rsp), %eax +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: shll $16, %eax +; KNL-NEXT: orl %edx, %eax ; KNL-NEXT: shlq $32, %rax ; KNL-NEXT: orq %rcx, %rax ; KNL-NEXT: movq %rbp, %rsp @@ -1864,7 +1855,7 @@ ; KNL-NEXT: movq %rsp, %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-128, %rsp -; KNL-NEXT: subq $384, %rsp ## imm = 0x180 +; KNL-NEXT: subq $256, %rsp ## imm = 0x100 ; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm0, %xmm0 @@ -1978,56 +1969,60 @@ ; KNL-NEXT: vmovdqa %ymm3, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) -; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp) -; KNL-NEXT: setne 128(%rsp,%rax) -; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 -; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2 +; KNL-NEXT: vmovdqa %ymm2, (%rsp) +; KNL-NEXT: setne (%rsp,%rax) +; KNL-NEXT: vmovdqa (%rsp), %ymm2 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3 +; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm1, %xmm4 -; KNL-NEXT: vpmovsxbd %xmm4, %zmm4 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm4 ; KNL-NEXT: vpslld $31, %zmm4, %zmm4 ; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vextracti128 $1, %ymm2, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm2, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vextracti128 $1, %ymm3, %xmm1 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: vpmovsxbd %xmm3, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: vextracti128 $1, %ymm3, %xmm2 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: shll $16, %eax +; KNL-NEXT: orl %edx, %eax +; KNL-NEXT: shlq $32, %rax +; KNL-NEXT: orq %rcx, %rax +; KNL-NEXT: vpmovsxbd %xmm1, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm3, %zmm1 +; KNL-NEXT: kmovw %k0, %esi +; KNL-NEXT: shll $16, %esi +; KNL-NEXT: orl %ecx, %esi +; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: shlq $32, %rax -; KNL-NEXT: orq %rcx, %rax -; KNL-NEXT: movl (%rsp), %ecx -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %edx +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: shll $16, %edx +; KNL-NEXT: orl %ecx, %edx ; KNL-NEXT: shlq $32, %rdx -; KNL-NEXT: orq %rcx, %rdx +; KNL-NEXT: orq %rsi, %rdx ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper @@ -2179,7 +2174,7 @@ ; KNL-NEXT: movq %rsp, %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-128, %rsp -; KNL-NEXT: subq $384, %rsp ## imm = 0x180 +; KNL-NEXT: subq $256, %rsp ## imm = 0x100 ; KNL-NEXT: ## kill: def %esi killed %esi def %rsi ; KNL-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; KNL-NEXT: vpxor %ymm4, %ymm0, %ymm0 @@ -2195,56 +2190,60 @@ ; KNL-NEXT: vmovdqa %ymm3, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) -; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) -; KNL-NEXT: setne 128(%rsp,%rsi) -; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 -; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2 +; KNL-NEXT: vmovdqa %ymm0, (%rsp) +; KNL-NEXT: setne (%rsp,%rsi) +; KNL-NEXT: vmovdqa (%rsp), %ymm2 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3 +; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm1, %xmm4 -; KNL-NEXT: vpmovsxbd %xmm4, %zmm4 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm4 ; KNL-NEXT: vpslld $31, %zmm4, %zmm4 ; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vextracti128 $1, %ymm2, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm2, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vextracti128 $1, %ymm3, %xmm1 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: vpmovsxbd %xmm3, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: vextracti128 $1, %ymm3, %xmm2 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: shll $16, %eax +; KNL-NEXT: orl %edx, %eax +; KNL-NEXT: shlq $32, %rax +; KNL-NEXT: orq %rcx, %rax +; KNL-NEXT: vpmovsxbd %xmm1, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm3, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 +; KNL-NEXT: kmovw %k0, %esi +; KNL-NEXT: shll $16, %esi +; KNL-NEXT: orl %ecx, %esi +; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: shlq $32, %rax -; KNL-NEXT: orq %rcx, %rax -; KNL-NEXT: movl (%rsp), %ecx -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %edx +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: shll $16, %edx +; KNL-NEXT: orl %ecx, %edx ; KNL-NEXT: shlq $32, %rdx -; KNL-NEXT: orq %rcx, %rdx +; KNL-NEXT: orq %rsi, %rdx ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -975,38 +975,32 @@ ; ; KNL-LABEL: test16: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: .cfi_offset %rbp, -16 -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: .cfi_def_cfa_register %rbp -; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $64, %rsp -; KNL-NEXT: movl %edi, (%rsp) +; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: shrq $32, %rdi -; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; KNL-NEXT: kmovw (%rsp), %k1 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; KNL-NEXT: shrq $48, %rax +; KNL-NEXT: shrl $16, %ecx +; KNL-NEXT: kmovw %ecx, %k1 +; KNL-NEXT: kmovw %eax, %k2 +; KNL-NEXT: kmovw %edi, %k3 +; KNL-NEXT: movb $1, %al +; KNL-NEXT: kmovw %eax, %k4 +; KNL-NEXT: kshiftrw $5, %k0, %k5 +; KNL-NEXT: kxorw %k4, %k5, %k4 +; KNL-NEXT: kshiftlw $15, %k4, %k4 +; KNL-NEXT: kshiftrw $10, %k4, %k4 +; KNL-NEXT: kxorw %k0, %k4, %k4 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: movl $1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z} +; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 -; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 -; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp +; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test16: @@ -1037,38 +1031,32 @@ ; ; AVX512DQ-LABEL: test16: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: pushq %rbp -; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 -; AVX512DQ-NEXT: .cfi_offset %rbp, -16 -; AVX512DQ-NEXT: movq %rsp, %rbp -; AVX512DQ-NEXT: .cfi_def_cfa_register %rbp -; AVX512DQ-NEXT: andq $-32, %rsp -; AVX512DQ-NEXT: subq $64, %rsp -; AVX512DQ-NEXT: movl %edi, (%rsp) +; AVX512DQ-NEXT: movq %rdi, %rax +; AVX512DQ-NEXT: movl %edi, %ecx +; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: shrq $32, %rdi -; AVX512DQ-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; AVX512DQ-NEXT: kmovw (%rsp), %k0 -; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 +; AVX512DQ-NEXT: shrq $48, %rax +; AVX512DQ-NEXT: shrl $16, %ecx +; AVX512DQ-NEXT: kmovw %ecx, %k1 +; AVX512DQ-NEXT: kmovw %eax, %k2 +; AVX512DQ-NEXT: kmovw %edi, %k3 +; AVX512DQ-NEXT: movb $1, %al +; AVX512DQ-NEXT: kmovw %eax, %k4 +; AVX512DQ-NEXT: kshiftrw $5, %k0, %k5 +; AVX512DQ-NEXT: kxorw %k4, %k5, %k4 +; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 +; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4 +; AVX512DQ-NEXT: kxorw %k0, %k4, %k0 +; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 -; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512DQ-NEXT: movl $1, %eax -; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 -; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 +; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 -; AVX512DQ-NEXT: movq %rbp, %rsp -; AVX512DQ-NEXT: popq %rbp +; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: retq %a = bitcast i64 %x to <64 x i1> %b = insertelement <64 x i1>%a, i1 true, i32 5 @@ -1080,40 +1068,33 @@ ; ; KNL-LABEL: test17: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: .cfi_offset %rbp, -16 -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: .cfi_def_cfa_register %rbp -; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $64, %rsp -; KNL-NEXT: movl %edi, (%rsp) +; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: shrq $32, %rdi -; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; KNL-NEXT: kmovw (%rsp), %k1 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: xorl %eax, %eax +; KNL-NEXT: shrq $48, %rax +; KNL-NEXT: shrl $16, %ecx +; KNL-NEXT: kmovw %ecx, %k1 +; KNL-NEXT: kmovw %eax, %k2 +; KNL-NEXT: kmovw %edi, %k3 ; KNL-NEXT: cmpl %edx, %esi ; KNL-NEXT: setg %al -; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; KNL-NEXT: kmovw %eax, %k4 +; KNL-NEXT: kshiftrw $5, %k0, %k5 +; KNL-NEXT: kxorw %k4, %k5, %k4 +; KNL-NEXT: kshiftlw $15, %k4, %k4 +; KNL-NEXT: kshiftrw $10, %k4, %k4 +; KNL-NEXT: kxorw %k0, %k4, %k4 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z} +; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z} +; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 -; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 -; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp +; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test17: @@ -1146,40 +1127,33 @@ ; ; AVX512DQ-LABEL: test17: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: pushq %rbp -; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 -; AVX512DQ-NEXT: .cfi_offset %rbp, -16 -; AVX512DQ-NEXT: movq %rsp, %rbp -; AVX512DQ-NEXT: .cfi_def_cfa_register %rbp -; AVX512DQ-NEXT: andq $-32, %rsp -; AVX512DQ-NEXT: subq $64, %rsp -; AVX512DQ-NEXT: movl %edi, (%rsp) +; AVX512DQ-NEXT: movq %rdi, %rax +; AVX512DQ-NEXT: movl %edi, %ecx +; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: shrq $32, %rdi -; AVX512DQ-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; AVX512DQ-NEXT: kmovw (%rsp), %k0 -; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 -; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512DQ-NEXT: xorl %eax, %eax +; AVX512DQ-NEXT: shrq $48, %rax +; AVX512DQ-NEXT: shrl $16, %ecx +; AVX512DQ-NEXT: kmovw %ecx, %k1 +; AVX512DQ-NEXT: kmovw %eax, %k2 +; AVX512DQ-NEXT: kmovw %edi, %k3 ; AVX512DQ-NEXT: cmpl %edx, %esi ; AVX512DQ-NEXT: setg %al -; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 -; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 +; AVX512DQ-NEXT: kmovw %eax, %k4 +; AVX512DQ-NEXT: kshiftrw $5, %k0, %k5 +; AVX512DQ-NEXT: kxorw %k4, %k5, %k4 +; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 +; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4 +; AVX512DQ-NEXT: kxorw %k0, %k4, %k0 +; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 -; AVX512DQ-NEXT: movq %rbp, %rsp -; AVX512DQ-NEXT: popq %rbp +; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: retq %a = bitcast i64 %x to <64 x i1> %b = icmp sgt i32 %y, %z @@ -1281,14 +1255,14 @@ define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { ; KNL-LABEL: test21: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 -; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 -; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2 +; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3 +; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 +; KNL-NEXT: vpand %ymm0, %ymm2, %ymm0 +; KNL-NEXT: vpsllw $15, %ymm3, %ymm2 +; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 ; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1 ; KNL-NEXT: retq ; @@ -1308,14 +1282,14 @@ ; ; AVX512DQ-LABEL: test21: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX512DQ-NEXT: vpsllw $15, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpsraw $15, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm0 -; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm2 +; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero ; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero ; AVX512DQ-NEXT: vpsllw $15, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpand %ymm0, %ymm2, %ymm0 +; AVX512DQ-NEXT: vpsllw $15, %ymm3, %ymm2 +; AVX512DQ-NEXT: vpsraw $15, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpand %ymm1, %ymm2, %ymm1 ; AVX512DQ-NEXT: retq %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer @@ -1691,8 +1665,15 @@ define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { ; KNL-LABEL: test_build_vec_v64i1: ; KNL: ## %bb.0: -; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 +; KNL-NEXT: vextractf128 $1, %ymm1, %xmm2 +; KNL-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,255,0,0,0,255,0,255,0,0,255,255,0,255,0] +; KNL-NEXT: vandps %xmm3, %xmm2, %xmm2 +; KNL-NEXT: vandps %xmm3, %xmm1, %xmm1 +; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; KNL-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm2 +; KNL-NEXT: vextractf128 $1, %ymm0, %xmm0 +; KNL-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; KNL-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test_build_vec_v64i1: @@ -1707,8 +1688,15 @@ ; ; AVX512DQ-LABEL: test_build_vec_v64i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 +; AVX512DQ-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm3 = [255,0,255,0,0,0,255,0,255,0,0,255,255,0,255,0] +; AVX512DQ-NEXT: vandps %xmm3, %xmm2, %xmm2 +; AVX512DQ-NEXT: vandps %xmm3, %xmm1, %xmm1 +; AVX512DQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX512DQ-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm2 +; AVX512DQ-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512DQ-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; AVX512DQ-NEXT: retq %ret = select <64 x i1> , <64 x i8> %x, <64 x i8> zeroinitializer ret <64 x i8> %ret @@ -1815,51 +1803,29 @@ ; ; KNL-LABEL: ktest_2: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: .cfi_offset %rbp, -16 -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: .cfi_def_cfa_register %rbp -; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $32, %rsp ; KNL-NEXT: vmovups (%rdi), %zmm2 ; KNL-NEXT: vmovups 64(%rdi), %zmm3 -; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k1 -; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm2, %xmm2 -; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k2 -; KNL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; KNL-NEXT: vpmovdb %zmm3, %xmm3 -; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z} -; KNL-NEXT: vmovups 4(%rdi), %zmm5 {%k1} {z} -; KNL-NEXT: vcmpltps %zmm5, %zmm0, %k1 -; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm5, %xmm5 -; KNL-NEXT: vpor %xmm5, %xmm2, %xmm2 -; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k1 -; KNL-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm4, %xmm4 -; KNL-NEXT: vpor %xmm4, %xmm3, %xmm3 -; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 -; KNL-NEXT: vpslld $31, %zmm3, %zmm3 -; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL-NEXT: vpslld $31, %zmm2, %zmm2 -; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: cmpl $0, (%rsp) +; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1 +; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2 +; KNL-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z} +; KNL-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z} +; KNL-NEXT: vcmpltps %zmm3, %zmm1, %k0 +; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k3 +; KNL-NEXT: korw %k3, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax +; KNL-NEXT: korw %k0, %k1, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: orl %eax, %ecx ; KNL-NEXT: je LBB42_2 ; KNL-NEXT: ## %bb.1: ## %L1 ; KNL-NEXT: vmovaps %zmm0, (%rdi) ; KNL-NEXT: vmovaps %zmm1, 64(%rdi) -; KNL-NEXT: jmp LBB42_3 +; KNL-NEXT: vzeroupper +; KNL-NEXT: retq ; KNL-NEXT: LBB42_2: ## %L2 ; KNL-NEXT: vmovaps %zmm0, 4(%rdi) ; KNL-NEXT: vmovaps %zmm1, 68(%rdi) -; KNL-NEXT: LBB42_3: ## %End -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -1917,51 +1883,29 @@ ; ; AVX512DQ-LABEL: ktest_2: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: pushq %rbp -; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 -; AVX512DQ-NEXT: .cfi_offset %rbp, -16 -; AVX512DQ-NEXT: movq %rsp, %rbp -; AVX512DQ-NEXT: .cfi_def_cfa_register %rbp -; AVX512DQ-NEXT: andq $-32, %rsp -; AVX512DQ-NEXT: subq $32, %rsp ; AVX512DQ-NEXT: vmovups (%rdi), %zmm2 ; AVX512DQ-NEXT: vmovups 64(%rdi), %zmm3 -; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k1 -; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2 -; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k2 -; AVX512DQ-NEXT: vpmovm2d %k2, %zmm3 -; AVX512DQ-NEXT: vpmovdb %zmm3, %xmm3 -; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z} -; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm5 {%k1} {z} -; AVX512DQ-NEXT: vcmpltps %zmm5, %zmm0, %k0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm5 -; AVX512DQ-NEXT: vpmovdb %zmm5, %xmm5 -; AVX512DQ-NEXT: vpor %xmm5, %xmm2, %xmm2 -; AVX512DQ-NEXT: vcmpltps %zmm4, %zmm1, %k0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm4 -; AVX512DQ-NEXT: vpmovdb %zmm4, %xmm4 -; AVX512DQ-NEXT: vpor %xmm4, %xmm3, %xmm3 -; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 -; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 -; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kmovw %k0, (%rsp) -; AVX512DQ-NEXT: cmpl $0, (%rsp) +; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k1 +; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k2 +; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z} +; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z} +; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm1, %k0 +; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm0, %k3 +; AVX512DQ-NEXT: korw %k3, %k2, %k2 +; AVX512DQ-NEXT: kmovw %k2, %eax +; AVX512DQ-NEXT: korw %k0, %k1, %k0 +; AVX512DQ-NEXT: kmovw %k0, %ecx +; AVX512DQ-NEXT: shll $16, %ecx +; AVX512DQ-NEXT: orl %eax, %ecx ; AVX512DQ-NEXT: je LBB42_2 ; AVX512DQ-NEXT: ## %bb.1: ## %L1 ; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi) ; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi) -; AVX512DQ-NEXT: jmp LBB42_3 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: LBB42_2: ## %L2 ; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi) ; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi) -; AVX512DQ-NEXT: LBB42_3: ## %End -; AVX512DQ-NEXT: movq %rbp, %rsp -; AVX512DQ-NEXT: popq %rbp ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %addr1 = getelementptr float, float * %base, i64 0 @@ -2334,14 +2278,14 @@ define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { ; KNL-LABEL: store_32i1: ; KNL: ## %bb.0: -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 +; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, 2(%rdi) +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: kmovw %k1, 2(%rdi) ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -2364,14 +2308,14 @@ ; ; AVX512DQ-LABEL: store_32i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -2383,16 +2327,12 @@ ; KNL-LABEL: store_32i1_1: ; KNL: ## %bb.0: ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 -; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, 2(%rdi) -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: vpmovsxwd %ymm1, %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: kmovw %k1, 2(%rdi) ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -2416,16 +2356,12 @@ ; AVX512DQ-LABEL: store_32i1_1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) -; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0 +; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -2439,21 +2375,21 @@ ; ; KNL-LABEL: store_64i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 -; KNL-NEXT: vpslld $31, %zmm3, %zmm3 -; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 -; KNL-NEXT: kmovw %k0, 6(%rdi) -; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL-NEXT: vpslld $31, %zmm2, %zmm2 -; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 -; KNL-NEXT: kmovw %k0, 4(%rdi) -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, 2(%rdi) ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2 +; KNL-NEXT: vpmovsxbd %xmm3, %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k3 +; KNL-NEXT: kmovw %k3, 6(%rdi) +; KNL-NEXT: kmovw %k2, 4(%rdi) +; KNL-NEXT: kmovw %k1, 2(%rdi) ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -2476,21 +2412,21 @@ ; ; AVX512DQ-LABEL: store_64i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 -; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kmovw %k0, 6(%rdi) -; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 -; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kmovw %k0, 4(%rdi) -; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0 +; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0 +; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm0 +; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k3 +; AVX512DQ-NEXT: kmovw %k3, 6(%rdi) +; AVX512DQ-NEXT: kmovw %k2, 4(%rdi) +; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq Index: test/CodeGen/X86/avx512-masked-memop-64-32.ll =================================================================== --- test/CodeGen/X86/avx512-masked-memop-64-32.ll +++ test/CodeGen/X86/avx512-masked-memop-64-32.ll @@ -240,18 +240,18 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0) { ; AVX512F-LABEL: test_load_32f64: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm5 -; AVX512F-NEXT: vpmovsxbd %xmm5, %zmm5 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm5 ; AVX512F-NEXT: vpslld $31, %zmm5, %zmm5 ; AVX512F-NEXT: vptestmd %zmm5, %zmm5, %k1 -; AVX512F-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k1} +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 -; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k2} +; AVX512F-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2} +; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: kshiftrw $8, %k2, %k2 +; AVX512F-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k2} ; AVX512F-NEXT: kshiftrw $8, %k1, %k1 -; AVX512F-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1} -; AVX512F-NEXT: kshiftrw $8, %k2, %k1 ; AVX512F-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1} ; AVX512F-NEXT: vmovapd %zmm5, %zmm2 ; AVX512F-NEXT: retq Index: test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- test/CodeGen/X86/avx512-vec-cmp.ll +++ test/CodeGen/X86/avx512-vec-cmp.ll @@ -195,25 +195,12 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind { ; KNL-LABEL: test12_v32i32: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $32, %rsp -; KNL-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: movl (%rsp), %eax -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp +; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vpcmpeqd %zmm3, %zmm1, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: shll $16, %eax +; KNL-NEXT: orl %ecx, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -233,40 +220,28 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind { ; KNL-LABEL: test12_v64i16: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $64, %rsp -; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1 -; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 -; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: vpcmpeqw %ymm7, %ymm3, %ymm0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: orl %eax, %ecx ; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: movl (%rsp), %ecx -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: vpcmpeqw %ymm7, %ymm3, %ymm0 +; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: shll $16, %eax +; KNL-NEXT: orl %edx, %eax ; KNL-NEXT: shlq $32, %rax ; KNL-NEXT: orq %rcx, %rax -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; Index: test/CodeGen/X86/avx512vl-vec-masked-cmp.ll =================================================================== --- test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -11,22 +11,10 @@ ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -47,22 +35,10 @@ ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -85,27 +61,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -129,27 +89,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -174,27 +118,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -215,27 +143,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -258,32 +170,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -307,32 +199,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -358,30 +230,16 @@ ; ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -403,30 +261,16 @@ ; ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -450,41 +294,19 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -509,41 +331,19 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -683,49 +483,10 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -746,49 +507,10 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -811,50 +533,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -878,50 +561,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -946,54 +590,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1014,54 +615,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1084,55 +642,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1156,55 +671,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1230,24 +702,10 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1269,24 +727,10 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1310,27 +754,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1355,27 +783,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1401,29 +813,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1445,29 +839,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1491,32 +867,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1541,32 +897,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1592,30 +928,43 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vmovq %xmm3, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -1623,109 +972,109 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm2, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm5, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm5, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -1733,7 +1082,11 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 +; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -1741,55 +1094,20 @@ ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm8, %rcx -; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1811,67 +1129,60 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm1 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -1879,7 +1190,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -1889,41 +1200,30 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1947,71 +1247,23 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vmovq %xmm3, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm6, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -2021,17 +1273,17 @@ ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 @@ -2039,69 +1291,69 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm7, %rcx +; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm7, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm5, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -2110,52 +1362,74 @@ ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2180,68 +1454,60 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; NoVLX-NEXT: vmovq %xmm1, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm1 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -2249,7 +1515,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -2259,51 +1525,33 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2710,37 +1958,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2761,37 +1984,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2814,38 +2012,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2870,38 +2043,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2927,37 +2075,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2981,38 +2104,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3039,42 +2137,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3095,42 +2164,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3153,43 +2193,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3214,43 +2225,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3276,42 +2258,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3335,43 +2288,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3593,49 +2517,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3657,49 +2544,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3723,50 +2573,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3791,50 +2604,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3860,51 +2636,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp -; NoVLX-NEXT: vzeroupper -; NoVLX-NEXT: retq +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: vzeroupper +; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b @@ -3927,50 +2666,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3997,54 +2699,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4066,54 +2727,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4137,55 +2757,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4210,55 +2789,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4284,54 +2822,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4356,55 +2853,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4431,24 +2887,8 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4470,24 +2910,8 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4511,25 +2935,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4554,25 +2962,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4598,24 +2990,8 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4640,25 +3016,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4685,29 +3045,9 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4729,29 +3069,9 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4775,30 +3095,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4823,30 +3123,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4872,29 +3152,9 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4919,30 +3179,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5541,31 +3781,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5586,31 +3807,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5633,32 +3835,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5683,32 +3866,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5734,31 +3898,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5782,32 +3927,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5834,36 +3960,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5884,36 +3987,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5936,37 +4016,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5991,37 +4048,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6047,36 +4081,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6100,37 +4111,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6552,37 +4540,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6604,37 +4567,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6658,38 +4596,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6715,38 +4628,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6773,37 +4661,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6828,38 +4691,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6887,42 +4725,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6944,42 +4753,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7003,43 +4783,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7065,44 +4816,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp -; NoVLX-NEXT: vzeroupper +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax +; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> @@ -7128,42 +4850,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7188,43 +4881,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7422,47 +5086,8 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7484,47 +5109,8 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7548,48 +5134,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7614,48 +5161,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7681,47 +5189,8 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7746,48 +5215,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7814,52 +5244,9 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7881,52 +5268,9 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7950,53 +5294,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8021,53 +5322,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8093,52 +5351,9 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8163,53 +5378,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8235,22 +5407,10 @@ ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8271,22 +5431,10 @@ ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8309,27 +5457,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8353,27 +5485,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8398,27 +5514,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8439,27 +5539,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8482,32 +5566,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8531,32 +5595,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8582,30 +5626,16 @@ ; ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8627,30 +5657,16 @@ ; ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8674,41 +5690,19 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8733,41 +5727,19 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8907,49 +5879,10 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8970,49 +5903,10 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9035,50 +5929,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9102,50 +5957,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9170,54 +5986,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9238,54 +6011,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9308,55 +6038,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9380,55 +6067,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9454,24 +6098,10 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9493,24 +6123,10 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9534,27 +6150,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9579,27 +6179,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9625,29 +6209,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9669,29 +6235,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9715,32 +6263,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9765,32 +6293,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9816,204 +6324,186 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vmovq %xmm3, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm2, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm7, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm7, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm5, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm5, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -10035,67 +6525,60 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm1 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -10103,7 +6586,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -10113,41 +6596,30 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -10171,161 +6643,133 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vmovq %xmm3, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm5, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -10333,17 +6777,7 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -10351,35 +6785,47 @@ ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -10404,68 +6850,60 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; NoVLX-NEXT: vmovq %xmm1, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm1 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -10473,7 +6911,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -10483,51 +6921,33 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -10934,37 +7354,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -10985,37 +7380,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11038,38 +7408,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11094,38 +7439,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11151,37 +7471,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11205,38 +7500,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11263,42 +7533,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11319,42 +7560,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11377,43 +7589,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11438,43 +7621,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11500,42 +7654,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11559,43 +7684,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11817,49 +7913,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11881,49 +7940,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11947,50 +7969,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12015,50 +8000,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12084,49 +8032,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12151,50 +8062,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12221,54 +8095,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12290,54 +8123,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12361,55 +8153,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12434,55 +8185,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12508,54 +8218,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12580,55 +8249,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12655,24 +8283,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12694,24 +8306,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12735,25 +8331,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12778,25 +8358,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12822,24 +8386,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12864,25 +8412,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12909,29 +8441,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12953,29 +8465,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12999,30 +8491,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13047,30 +8519,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13096,29 +8548,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13143,30 +8575,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13765,31 +9177,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13810,31 +9203,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13857,32 +9231,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13907,32 +9262,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13958,31 +9294,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14006,32 +9323,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14058,36 +9356,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14108,36 +9383,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14160,37 +9412,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14215,37 +9444,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14271,38 +9477,15 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp -; NoVLX-NEXT: vzeroupper -; NoVLX-NEXT: retq +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax +; NoVLX-NEXT: vzeroupper +; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b @@ -14324,37 +9507,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14776,37 +9936,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14828,37 +9963,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14882,38 +9992,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14939,38 +10024,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14997,37 +10057,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15052,38 +10087,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15111,42 +10121,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15168,42 +10149,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15227,43 +10179,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15289,43 +10212,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15352,42 +10246,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15412,43 +10277,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15646,47 +10482,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15708,47 +10505,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15772,48 +10530,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15838,48 +10557,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15905,47 +10585,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15970,48 +10611,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16038,52 +10640,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16105,52 +10664,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16174,53 +10690,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16245,53 +10718,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16317,52 +10747,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16387,53 +10774,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16459,25 +10803,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16498,26 +10830,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16540,30 +10860,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16587,13 +10891,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 @@ -16601,17 +10898,8 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16636,30 +10924,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16680,31 +10952,15 @@ ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16727,35 +10983,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16779,13 +11015,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 @@ -16793,22 +11022,9 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16834,34 +11050,20 @@ ; ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16883,35 +11085,21 @@ ; ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16935,43 +11123,23 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16996,44 +11164,24 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vmovdqa (%rsi), %ymm3 -; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17187,52 +11335,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17253,13 +11362,6 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 @@ -17267,39 +11369,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17322,13 +11392,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 @@ -17336,39 +11399,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17392,13 +11423,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 @@ -17407,39 +11431,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17464,57 +11456,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17535,13 +11484,6 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 @@ -17549,44 +11491,8 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17609,13 +11515,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 @@ -17623,44 +11522,8 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17684,13 +11547,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 @@ -17699,44 +11555,8 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17762,27 +11582,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17804,28 +11610,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17849,30 +11641,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17897,13 +11673,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 @@ -17911,17 +11680,8 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17947,32 +11707,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17994,33 +11736,15 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -18044,35 +11768,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -18097,13 +11801,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 @@ -18111,22 +11808,9 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -18152,30 +11836,43 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vmovq %xmm3, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -18183,109 +11880,109 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm2, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm5, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm5, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -18293,7 +11990,11 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -18301,60 +12002,25 @@ ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm8, %rcx -; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -18376,67 +12042,60 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm1 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -18444,7 +12103,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -18454,48 +12113,37 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm2 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -18519,51 +12167,23 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vmovq %xmm3, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -18571,109 +12191,109 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm5, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -18681,17 +12301,7 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -18699,38 +12309,52 @@ ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -18755,68 +12379,60 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; NoVLX-NEXT: vmovq %xmm1, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm1 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -18824,7 +12440,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -18834,56 +12450,40 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vmovdqa (%rsi), %ymm3 -; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0 -; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm3 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %ymm3, %ymm1, %ymm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 +; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19290,37 +12890,12 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19341,37 +12916,12 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19394,38 +12944,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19450,38 +12975,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19507,37 +13007,12 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19561,38 +13036,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19619,42 +13069,13 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19675,42 +13096,13 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19733,43 +13125,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19794,43 +13157,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19856,42 +13190,13 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19915,43 +13220,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20173,49 +13449,12 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20237,49 +13476,12 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20303,50 +13505,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20371,50 +13536,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20440,49 +13568,12 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20507,50 +13598,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20577,54 +13631,13 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20646,54 +13659,13 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20717,55 +13689,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20790,55 +13721,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20864,54 +13754,13 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20936,55 +13785,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21011,24 +13819,8 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21050,24 +13842,8 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21091,25 +13867,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21134,25 +13894,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21178,24 +13922,8 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21220,25 +13948,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21265,29 +13977,9 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21309,29 +14001,9 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21355,30 +14027,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21403,30 +14055,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21452,29 +14084,9 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21499,30 +14111,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22121,31 +14713,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22166,31 +14739,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22213,32 +14767,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22263,32 +14798,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22314,31 +14830,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22362,32 +14859,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22414,36 +14892,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22464,36 +14919,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22516,37 +14948,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22571,37 +14980,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22627,36 +15013,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22680,37 +15043,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23132,37 +15472,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23184,37 +15499,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23238,38 +15528,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23295,38 +15560,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23353,37 +15593,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23408,38 +15623,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23467,42 +15657,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23524,42 +15685,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23583,43 +15715,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23645,43 +15748,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23708,42 +15782,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23768,43 +15813,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24002,47 +16018,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24064,47 +16041,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24128,48 +16066,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24194,48 +16093,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24261,47 +16121,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24326,48 +16147,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24394,52 +16176,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24461,52 +16200,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24530,53 +16226,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24601,53 +16254,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24673,52 +16283,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24743,53 +16310,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24815,25 +16339,13 @@ ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24854,25 +16366,13 @@ ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24895,30 +16395,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24942,30 +16426,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24990,30 +16458,14 @@ ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25034,30 +16486,14 @@ ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25080,35 +16516,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25132,35 +16548,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25186,33 +16582,19 @@ ; ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25234,33 +16616,19 @@ ; ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25284,44 +16652,22 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 -; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25346,44 +16692,22 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm3, %ymm3 -; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25535,52 +16859,13 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25601,52 +16886,13 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25669,13 +16915,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 @@ -25683,39 +16922,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25739,13 +16946,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 @@ -25753,39 +16953,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25810,57 +16978,14 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25881,57 +17006,14 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25954,13 +17036,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 @@ -25968,44 +17043,8 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26029,13 +17068,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 @@ -26043,44 +17075,8 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26106,27 +17102,13 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26148,27 +17130,13 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26192,30 +17160,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26240,30 +17192,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26289,32 +17225,14 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26336,32 +17254,14 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26385,35 +17285,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26438,35 +17318,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26492,30 +17352,43 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vmovq %xmm3, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -26523,109 +17396,109 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm2, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm5, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm5, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -26633,7 +17506,10 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -26641,60 +17517,26 @@ ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm8, %rcx -; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 -; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1 +; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm0 +; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26716,67 +17558,60 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm1 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -26784,7 +17619,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -26794,46 +17629,35 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor 32(%rdi), %ymm2, %ymm3 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm2, %ymm1 +; NoVLX-NEXT: vpxor (%rdi), %ymm2, %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm0 +; NoVLX-NEXT: vpxor 32(%rdi), %ymm2, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26857,51 +17681,23 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vmovq %xmm3, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -26909,109 +17705,109 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm5, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -27019,16 +17815,7 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm6, %xmm6 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -27036,41 +17823,52 @@ ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm5, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] +; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 +; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 +; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %ymm5, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm5, %ymm3, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm6, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27095,28 +17893,20 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 @@ -27124,39 +17914,39 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -27164,7 +17954,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -27174,56 +17964,38 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm2, %ymm3 -; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor 32(%rsi), %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 +; NoVLX-NEXT: vpxor 32(%rsi), %ymm2, %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm5, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor (%rsi), %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27630,37 +18402,12 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27681,37 +18428,12 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27734,38 +18456,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27790,38 +18487,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27847,37 +18519,12 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27901,38 +18548,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27959,42 +18581,13 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28015,42 +18608,13 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28073,43 +18637,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28134,43 +18669,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28196,42 +18702,13 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28255,43 +18732,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28513,49 +18961,12 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28577,49 +18988,12 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28643,50 +19017,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28711,50 +19048,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28780,49 +19080,12 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28847,50 +19110,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28917,54 +19143,13 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28986,54 +19171,13 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29057,55 +19201,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29130,55 +19233,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29204,54 +19266,13 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29276,55 +19297,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29351,24 +19331,8 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29390,24 +19354,8 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29431,25 +19379,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29474,25 +19406,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29518,24 +19434,8 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29560,25 +19460,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29605,29 +19489,9 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29649,29 +19513,9 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29695,30 +19539,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29743,30 +19567,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29792,29 +19596,9 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29839,30 +19623,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30461,31 +20225,12 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30506,31 +20251,12 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30553,32 +20279,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30603,32 +20310,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30654,31 +20342,12 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30702,32 +20371,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30754,36 +20404,13 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30804,36 +20431,13 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30856,37 +20460,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30911,37 +20492,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30967,36 +20525,13 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31020,37 +20555,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31472,37 +20984,12 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31524,37 +21011,12 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31578,38 +21040,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31635,38 +21072,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31693,37 +21105,12 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31748,38 +21135,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31807,42 +21169,13 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31864,42 +21197,13 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31923,43 +21227,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31985,43 +21260,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32048,42 +21294,13 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32108,43 +21325,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32342,47 +21530,8 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32404,47 +21553,8 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32468,48 +21578,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32534,48 +21605,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32601,47 +21633,8 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32666,48 +21659,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32734,52 +21688,9 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32801,52 +21712,9 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32870,53 +21738,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32941,53 +21766,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33013,52 +21795,9 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33083,53 +21822,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33532,37 +22228,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33583,37 +22254,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33635,37 +22281,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33689,38 +22310,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33744,38 +22340,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33800,38 +22371,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33858,42 +22404,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33914,42 +22431,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33971,42 +22459,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34030,43 +22489,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34090,43 +22520,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34151,43 +22552,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34409,49 +22781,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34473,49 +22808,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34538,49 +22836,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34605,50 +22866,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34673,50 +22897,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34742,50 +22929,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34813,54 +22963,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34882,54 +22991,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34952,54 +23020,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35024,55 +23051,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35097,55 +23083,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35171,55 +23116,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35247,24 +23151,8 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35286,24 +23174,8 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35326,24 +23198,8 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35368,25 +23224,9 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35411,25 +23251,9 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35455,25 +23279,9 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35542,29 +23350,9 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35586,29 +23374,9 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35631,29 +23399,9 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35678,30 +23426,10 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35726,30 +23454,10 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35775,30 +23483,10 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36442,31 +24130,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36487,31 +24156,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36533,31 +24183,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36581,32 +24212,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36630,32 +24242,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36680,32 +24273,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36732,36 +24306,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36782,36 +24333,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36833,36 +24361,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36886,37 +24391,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36940,37 +24422,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36995,37 +24454,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37441,37 +24877,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37493,37 +24904,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37546,37 +24932,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37601,38 +24962,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37657,38 +24993,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37714,38 +25025,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37773,42 +25059,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37830,42 +25087,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37888,42 +25116,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37948,43 +25147,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38009,43 +25179,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38071,43 +25212,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38361,47 +25473,8 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38423,47 +25496,8 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38486,47 +25520,8 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38551,48 +25546,9 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38617,48 +25573,9 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38684,48 +25601,9 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38803,52 +25681,9 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38870,52 +25705,9 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38938,52 +25730,9 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -39008,53 +25757,10 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -39079,53 +25785,10 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -39151,53 +25814,10 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -39276,24 +25896,8 @@ ; ; NoVLX-LABEL: test_cmpm_rnd_zero: ; NoVLX: # %bb.0: -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8) Index: test/CodeGen/X86/bitcast-and-setcc-256.ll =================================================================== --- test/CodeGen/X86/bitcast-and-setcc-256.ll +++ test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -411,28 +411,22 @@ ; ; AVX512F-LABEL: v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) -; AVX512F-NEXT: movl (%rsp), %eax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2} +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1} +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: orl %ecx, %eax ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; Index: test/CodeGen/X86/bitcast-and-setcc-512.ll =================================================================== --- test/CodeGen/X86/bitcast-and-setcc-512.ll +++ test/CodeGen/X86/bitcast-and-setcc-512.ll @@ -280,38 +280,22 @@ ; ; AVX512F-LABEL: v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp +; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm1 ; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2 -; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 -; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm2 -; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 -; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) -; AVX512F-NEXT: movl (%rsp), %eax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: orl %ecx, %eax ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -1180,43 +1164,40 @@ ; ; AVX512F-LABEL: v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $64, %rsp ; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 +; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 +; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k2 ; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 -; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k3 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k4 +; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: movl (%rsp), %ecx -; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax +; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2 +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 +; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 +; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 {%k4} +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 {%k3} +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: shll $16, %ecx +; AVX512F-NEXT: orl %eax, %ecx +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2} +; AVX512F-NEXT: kmovw %k0, %edx +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1} +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: orl %edx, %eax ; AVX512F-NEXT: shlq $32, %rax ; AVX512F-NEXT: orq %rcx, %rax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; Index: test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll =================================================================== --- test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll +++ test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll @@ -505,24 +505,15 @@ ; ; AVX512F-LABEL: ext_i32_32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: shrl $16, %edi +; AVX512F-NEXT: kmovw %edi, %k2 ; AVX512F-NEXT: movl {{.*}}(%rip), %eax -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512VLBW-LABEL: ext_i32_32i8: @@ -792,27 +783,14 @@ ; ; AVX512F-LABEL: ext_i32_32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: shrl $16, %edi +; AVX512F-NEXT: kmovw %edi, %k2 +; AVX512F-NEXT: movl {{.*}}(%rip), %eax +; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z} +; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512F-NEXT: retq ; ; AVX512VLBW-LABEL: ext_i32_32i16: @@ -950,33 +928,27 @@ ; ; AVX512F-LABEL: ext_i64_64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $64, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) -; AVX512F-NEXT: shrq $32, %rdi -; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: movq %rdi, %rax +; AVX512F-NEXT: movq %rdi, %rcx +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: movl %edi, %edx +; AVX512F-NEXT: shrl $16, %edx +; AVX512F-NEXT: shrq $32, %rax +; AVX512F-NEXT: shrq $48, %rcx +; AVX512F-NEXT: kmovw %ecx, %k2 +; AVX512F-NEXT: kmovw %eax, %k3 +; AVX512F-NEXT: kmovw %edx, %k4 ; AVX512F-NEXT: movl {{.*}}(%rip), %eax -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z} +; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k4} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z} +; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k3} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm2 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512VLBW-LABEL: ext_i64_64i8: Index: test/CodeGen/X86/bitcast-setcc-256.ll =================================================================== --- test/CodeGen/X86/bitcast-setcc-256.ll +++ test/CodeGen/X86/bitcast-setcc-256.ll @@ -183,24 +183,16 @@ ; ; AVX512F-LABEL: v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) -; AVX512F-NEXT: movl (%rsp), %eax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: orl %ecx, %eax ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; Index: test/CodeGen/X86/bitcast-setcc-512.ll =================================================================== --- test/CodeGen/X86/bitcast-setcc-512.ll +++ test/CodeGen/X86/bitcast-setcc-512.ll @@ -51,28 +51,16 @@ ; ; AVX512F-LABEL: v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) -; AVX512F-NEXT: movl (%rsp), %eax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: orl %ecx, %eax ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -858,35 +846,28 @@ ; ; AVX512F-LABEL: v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $64, %rsp ; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm2 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: shll $16, %ecx +; AVX512F-NEXT: orl %eax, %ecx ; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: kmovw %k0, %edx +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: movl (%rsp), %ecx -; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: orl %edx, %eax ; AVX512F-NEXT: shlq $32, %rax ; AVX512F-NEXT: orq %rcx, %rax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; Index: test/CodeGen/X86/vector-compare-results.ll =================================================================== --- test/CodeGen/X86/vector-compare-results.ll +++ test/CodeGen/X86/vector-compare-results.ll @@ -1973,23 +1973,25 @@ ; ; AVX512F-LABEL: test_cmp_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm4 ; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm3 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm4 +; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm2 +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 -; AVX512F-NEXT: vmovdqa %xmm4, %xmm2 +; AVX512F-NEXT: vmovdqa %xmm4, %xmm1 +; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 killed %ymm2 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v64i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm4 ; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512DQ-NEXT: vextracti128 $1, %ymm4, %xmm3 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm4 +; AVX512DQ-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm2 +; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 -; AVX512DQ-NEXT: vmovdqa %xmm4, %xmm2 +; AVX512DQ-NEXT: vmovdqa %xmm4, %xmm1 +; AVX512DQ-NEXT: # kill: def %xmm2 killed %xmm2 killed %ymm2 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; @@ -2383,32 +2385,24 @@ ; ; AVX512F-LABEL: test_cmp_v32f32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vcmpltps %zmm0, %zmm2, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vcmpltps %zmm1, %zmm3, %k1 +; AVX512F-NEXT: vcmpltps %zmm0, %zmm2, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v32f32: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k0 +; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k1 +; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: test_cmp_v32f32: @@ -2890,32 +2884,24 @@ ; ; AVX512F-LABEL: test_cmp_v32i32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm1, %k1 +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm0, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v32i32: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpcmpgtd %zmm3, %zmm1, %k0 +; AVX512DQ-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 +; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: test_cmp_v32i32: @@ -5426,36 +5412,36 @@ ; AVX512F-LABEL: test_cmp_v128i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 -; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3 -; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 -; AVX512F-NEXT: vpmovsxbd %xmm4, %zmm4 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm4 ; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k0 -; AVX512F-NEXT: kmovw %k0, 14(%rdi) -; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512F-NEXT: kmovw %k0, 12(%rdi) -; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512F-NEXT: kmovw %k0, 10(%rdi) -; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kmovw %k0, 8(%rdi) -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kmovw %k0, 6(%rdi) -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, 4(%rdi) -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, 2(%rdi) +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k2 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k3 +; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k4 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k5 +; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k6 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k7 +; AVX512F-NEXT: kmovw %k7, 14(%rdi) +; AVX512F-NEXT: kmovw %k6, 12(%rdi) +; AVX512F-NEXT: kmovw %k5, 10(%rdi) +; AVX512F-NEXT: kmovw %k4, 8(%rdi) +; AVX512F-NEXT: kmovw %k3, 6(%rdi) +; AVX512F-NEXT: kmovw %k2, 4(%rdi) +; AVX512F-NEXT: kmovw %k1, 2(%rdi) ; AVX512F-NEXT: kmovw %k0, (%rdi) ; AVX512F-NEXT: movq %rdi, %rax ; AVX512F-NEXT: vzeroupper @@ -5464,36 +5450,36 @@ ; AVX512DQ-LABEL: test_cmp_v128i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3 -; AVX512DQ-NEXT: vextracti128 $1, %ymm3, %xmm4 -; AVX512DQ-NEXT: vpmovsxbd %xmm4, %zmm4 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm4 ; AVX512DQ-NEXT: vptestmd %zmm4, %zmm4, %k0 -; AVX512DQ-NEXT: kmovw %k0, 14(%rdi) -; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kmovw %k0, 12(%rdi) -; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kmovw %k0, 10(%rdi) -; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kmovw %k0, 8(%rdi) -; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kmovw %k0, 6(%rdi) -; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 4(%rdi) -; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512DQ-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm0 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k2 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k3 +; AVX512DQ-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm0 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k4 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k5 +; AVX512DQ-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm0 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k6 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k7 +; AVX512DQ-NEXT: kmovw %k7, 14(%rdi) +; AVX512DQ-NEXT: kmovw %k6, 12(%rdi) +; AVX512DQ-NEXT: kmovw %k5, 10(%rdi) +; AVX512DQ-NEXT: kmovw %k4, 8(%rdi) +; AVX512DQ-NEXT: kmovw %k3, 6(%rdi) +; AVX512DQ-NEXT: kmovw %k2, 4(%rdi) +; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: movq %rdi, %rax ; AVX512DQ-NEXT: vzeroupper @@ -5914,40 +5900,32 @@ ; ; AVX512F-LABEL: test_cmp_v32f64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vcmpltpd %zmm0, %zmm4, %k0 -; AVX512F-NEXT: vcmpltpd %zmm1, %zmm5, %k1 -; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vcmpltpd %zmm2, %zmm6, %k0 ; AVX512F-NEXT: vcmpltpd %zmm3, %zmm7, %k1 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 +; AVX512F-NEXT: vcmpltpd %zmm0, %zmm4, %k0 +; AVX512F-NEXT: vcmpltpd %zmm1, %zmm5, %k2 +; AVX512F-NEXT: kunpckbw %k0, %k2, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v32f64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm4, %k0 -; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm5, %k1 -; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vcmpltpd %zmm2, %zmm6, %k0 ; AVX512DQ-NEXT: vcmpltpd %zmm3, %zmm7, %k1 ; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0 +; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm4, %k1 +; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm5, %k2 +; AVX512DQ-NEXT: kunpckbw %k1, %k2, %k1 +; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: test_cmp_v32f64: @@ -6535,40 +6513,32 @@ ; ; AVX512F-LABEL: test_cmp_v32i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpcmpgtq %zmm4, %zmm0, %k0 -; AVX512F-NEXT: vpcmpgtq %zmm5, %zmm1, %k1 -; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpcmpgtq %zmm6, %zmm2, %k0 ; AVX512F-NEXT: vpcmpgtq %zmm7, %zmm3, %k1 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 +; AVX512F-NEXT: vpcmpgtq %zmm4, %zmm0, %k0 +; AVX512F-NEXT: vpcmpgtq %zmm5, %zmm1, %k2 +; AVX512F-NEXT: kunpckbw %k0, %k2, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v32i64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpcmpgtq %zmm4, %zmm0, %k0 -; AVX512DQ-NEXT: vpcmpgtq %zmm5, %zmm1, %k1 -; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpcmpgtq %zmm6, %zmm2, %k0 ; AVX512DQ-NEXT: vpcmpgtq %zmm7, %zmm3, %k1 ; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0 +; AVX512DQ-NEXT: vpcmpgtq %zmm4, %zmm0, %k1 +; AVX512DQ-NEXT: vpcmpgtq %zmm5, %zmm1, %k2 +; AVX512DQ-NEXT: kunpckbw %k1, %k2, %k1 +; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: test_cmp_v32i64: Index: test/CodeGen/X86/vector-shuffle-v1.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-v1.ll +++ test/CodeGen/X86/vector-shuffle-v1.ll @@ -214,20 +214,40 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<32 x i1> %a) { ; AVX512F-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 +; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512VL-NEXT: vpslld $31, %zmm1, %zmm1 +; AVX512VL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512VL-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 +; AVX512VL-NEXT: vptestmd %zmm2, %zmm2, %k1 +; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: @@ -608,65 +628,33 @@ define i64 @shuf64i1_zero(i64 %a) { ; AVX512F-LABEL: shuf64i1_zero: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $96, %rsp -; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vpbroadcastd %xmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) -; AVX512F-NEXT: movl (%rsp), %ecx +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: shll $16, %ecx +; AVX512F-NEXT: orl %eax, %ecx ; AVX512F-NEXT: movq %rcx, %rax ; AVX512F-NEXT: shlq $32, %rax ; AVX512F-NEXT: orq %rcx, %rax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf64i1_zero: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: pushq %rbp -; AVX512VL-NEXT: .cfi_def_cfa_offset 16 -; AVX512VL-NEXT: .cfi_offset %rbp, -16 -; AVX512VL-NEXT: movq %rsp, %rbp -; AVX512VL-NEXT: .cfi_def_cfa_register %rbp -; AVX512VL-NEXT: andq $-32, %rsp -; AVX512VL-NEXT: subq $96, %rsp -; AVX512VL-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; AVX512VL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; AVX512VL-NEXT: kmovw %edi, %k1 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512VL-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512VL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512VL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512VL-NEXT: vpbroadcastd %xmm0, %zmm0 ; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512VL-NEXT: kmovw %k0, (%rsp) -; AVX512VL-NEXT: movl (%rsp), %ecx +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: kmovw %k0, %ecx +; AVX512VL-NEXT: shll $16, %ecx +; AVX512VL-NEXT: orl %eax, %ecx ; AVX512VL-NEXT: movq %rcx, %rax ; AVX512VL-NEXT: shlq $32, %rax ; AVX512VL-NEXT: orq %rcx, %rax -; AVX512VL-NEXT: movq %rbp, %rsp -; AVX512VL-NEXT: popq %rbp ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ;