Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -1084,6 +1084,14 @@ /// \brief Customize the preferred legalization strategy for certain types. LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; + MVT getRegisterTypeForCallingConv(MVT VT) const override; + + MVT getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const override; + + unsigned getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const override; + bool isIntDivCheap(EVT VT, AttributeList Attr) const override; bool supportSwiftError() const override; Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -1720,6 +1720,9 @@ TargetLoweringBase::LegalizeTypeAction X86TargetLowering::getPreferredVectorAction(EVT VT) const { + if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) + return TypeSplitVector; + if (ExperimentalVectorWideningLegalization && VT.getVectorNumElements() != 1 && VT.getVectorElementType().getSimpleVT() != MVT::i1) @@ -1728,6 +1731,26 @@ return TargetLoweringBase::getPreferredVectorAction(VT); } +MVT X86TargetLowering::getRegisterTypeForCallingConv(MVT VT) const { + if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) + return MVT::v32i8; + return TargetLowering::getRegisterTypeForCallingConv(VT); +} + +MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) + return MVT::v32i8; + return TargetLowering::getRegisterTypeForCallingConv(Context, VT); +} + +unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, + EVT VT) const { + if (VT == MVT::v32i1 && Subtarget.hasAVX512() && !Subtarget.hasBWI()) + return 1; + return TargetLowering::getNumRegistersForCallingConv(Context, VT); +} + EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext& Context, EVT VT) const { Index: llvm/trunk/test/CodeGen/X86/avg-mask.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avg-mask.ll +++ llvm/trunk/test/CodeGen/X86/avg-mask.ll @@ -60,22 +60,16 @@ define <32 x i8> @avg_v32i8_mask(<32 x i8> %a, <32 x i8> %b, <32 x i8> %src, i32 %mask) nounwind { ; AVX512F-LABEL: avg_v32i8_mask: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: shrl $16, %edi ; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 +; AVX512F-NEXT: kmovw %edi, %k2 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v32i8_mask: @@ -98,22 +92,16 @@ define <32 x i8> @avg_v32i8_maskz(<32 x i8> %a, <32 x i8> %b, i32 %mask) nounwind { ; AVX512F-LABEL: avg_v32i8_maskz: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: shrl $16, %edi ; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 +; AVX512F-NEXT: kmovw %edi, %k2 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v32i8_maskz: @@ -135,33 +123,30 @@ define <64 x i8> @avg_v64i8_mask(<64 x i8> %a, <64 x i8> %b, <64 x i8> %src, i64 %mask) nounwind { ; AVX512F-LABEL: avg_v64i8_mask: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $64, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) -; AVX512F-NEXT: shrq $32, %rdi -; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: movq %rdi, %rax +; AVX512F-NEXT: movq %rdi, %rcx +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: movl %edi, %edx +; AVX512F-NEXT: shrl $16, %edx +; AVX512F-NEXT: shrq $32, %rax +; AVX512F-NEXT: shrq $48, %rcx ; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 +; AVX512F-NEXT: kmovw %ecx, %k2 +; AVX512F-NEXT: kmovw %eax, %k3 +; AVX512F-NEXT: kmovw %edx, %k4 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k4} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k3} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v64i8_mask: @@ -184,33 +169,30 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwind { ; AVX512F-LABEL: avg_v64i8_maskz: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $64, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) -; AVX512F-NEXT: shrq $32, %rdi -; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: movq %rdi, %rax +; AVX512F-NEXT: movq %rdi, %rcx +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: movl %edi, %edx +; AVX512F-NEXT: shrl $16, %edx +; AVX512F-NEXT: shrq $32, %rax +; AVX512F-NEXT: shrq $48, %rcx ; AVX512F-NEXT: vpavgb %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpavgb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 +; AVX512F-NEXT: kmovw %ecx, %k2 +; AVX512F-NEXT: kmovw %eax, %k3 +; AVX512F-NEXT: kmovw %edx, %k4 ; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k4} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k3} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm3, %xmm3 ; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v64i8_maskz: @@ -340,29 +322,17 @@ define <32 x i16> @avg_v32i16_mask(<32 x i16> %a, <32 x i16> %b, <32 x i16> %src, i32 %mask) nounwind { ; AVX512F-LABEL: avg_v32i16_mask: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; AVX512F-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} -; AVX512F-NEXT: vpmovdb %zmm6, %xmm6 -; AVX512F-NEXT: vpternlogd $255, %zmm7, %zmm7, %zmm7 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm7, %xmm7 +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: shrl $16, %edi ; AVX512F-NEXT: vpavgw %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpavgw %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero -; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2 -; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2 +; AVX512F-NEXT: kmovw %edi, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero -; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2 -; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 ; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v32i16_mask: @@ -385,29 +355,17 @@ define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nounwind { ; AVX512F-LABEL: avg_v32i16_maskz: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; AVX512F-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z} -; AVX512F-NEXT: vpmovdb %zmm4, %xmm4 -; AVX512F-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm5, %xmm5 +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: shrl $16, %edi ; AVX512F-NEXT: vpavgw %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpavgw %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero -; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2 -; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2 +; AVX512F-NEXT: kmovw %edi, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} +; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 ; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero -; AVX512F-NEXT: vpsllw $15, %ymm2, %ymm2 -; AVX512F-NEXT: vpsraw $15, %ymm2, %ymm2 +; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 ; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512BWVL-LABEL: avg_v32i16_maskz: Index: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll +++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll @@ -782,39 +782,20 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32> %y) { ; KNL-LABEL: test_insertelement_v32i1: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: .cfi_offset %rbp, -16 -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: .cfi_def_cfa_register %rbp -; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $32, %rsp -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: setb %al -; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k1 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 -; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: movl (%rsp), %eax -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp +; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0 +; KNL-NEXT: kshiftrw $4, %k0, %k1 +; KNL-NEXT: kmovw %eax, %k2 +; KNL-NEXT: kxorw %k2, %k1, %k1 +; KNL-NEXT: kshiftlw $15, %k1, %k1 +; KNL-NEXT: kshiftrw $11, %k1, %k1 +; KNL-NEXT: kxorw %k0, %k1, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: orl %ecx, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -1014,7 +995,10 @@ ; KNL-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; KNL-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpextrb $2, %xmm0, %eax +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftrw $2, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -1041,7 +1025,10 @@ ; KNL-NEXT: vpxor %ymm0, %ymm1, %ymm0 ; KNL-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpextrb $15, %xmm0, %eax +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andb $1, %al ; KNL-NEXT: movb $4, %cl ; KNL-NEXT: subb %al, %cl @@ -1074,7 +1061,10 @@ ; KNL-NEXT: vpxor %ymm0, %ymm1, %ymm0 ; KNL-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpextrb $15, %xmm0, %eax +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: andb $1, %al ; KNL-NEXT: movb $4, %cl ; KNL-NEXT: subb %al, %cl @@ -1717,26 +1707,25 @@ ; KNL-NEXT: movq %rsp, %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $96, %rsp +; KNL-NEXT: subq $64, %rsp ; KNL-NEXT: ## kill: def %esi killed %esi def %rsi ; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; KNL-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; KNL-NEXT: andl $31, %esi ; KNL-NEXT: testb %dil, %dil -; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) -; KNL-NEXT: setne 32(%rsp,%rsi) -; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: vmovdqa %ymm0, (%rsp) +; KNL-NEXT: setne (%rsp,%rsi) +; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: movl (%rsp), %eax +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: shll $16, %eax +; KNL-NEXT: orl %ecx, %eax ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper @@ -1782,7 +1771,7 @@ ; KNL-NEXT: movq %rsp, %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-64, %rsp -; KNL-NEXT: subq $192, %rsp +; KNL-NEXT: subq $128, %rsp ; KNL-NEXT: ## kill: def %esi killed %esi def %rsi ; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; KNL-NEXT: vpxor %ymm2, %ymm0, %ymm0 @@ -1792,30 +1781,32 @@ ; KNL-NEXT: andl $63, %esi ; KNL-NEXT: testb %dil, %dil ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) -; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) -; KNL-NEXT: setne 64(%rsp,%rsi) -; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0 +; KNL-NEXT: vmovdqa %ymm0, (%rsp) +; KNL-NEXT: setne (%rsp,%rsi) +; KNL-NEXT: vmovdqa (%rsp), %ymm0 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm2 -; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 +; KNL-NEXT: vpmovsxbd %xmm0, %zmm2 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vextracti128 $1, %ymm1, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: vextracti128 $1, %ymm1, %xmm0 +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: movl (%rsp), %eax +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: shll $16, %eax +; KNL-NEXT: orl %edx, %eax ; KNL-NEXT: shlq $32, %rax ; KNL-NEXT: orq %rcx, %rax ; KNL-NEXT: movq %rbp, %rsp @@ -1863,7 +1854,7 @@ ; KNL-NEXT: movq %rsp, %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-128, %rsp -; KNL-NEXT: subq $384, %rsp ## imm = 0x180 +; KNL-NEXT: subq $256, %rsp ## imm = 0x100 ; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm0, %xmm0 ; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm0, %xmm0 @@ -1977,56 +1968,60 @@ ; KNL-NEXT: vmovdqa %ymm3, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) -; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp) -; KNL-NEXT: setne 128(%rsp,%rax) -; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 -; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2 +; KNL-NEXT: vmovdqa %ymm2, (%rsp) +; KNL-NEXT: setne (%rsp,%rax) +; KNL-NEXT: vmovdqa (%rsp), %ymm2 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3 +; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm1, %xmm4 -; KNL-NEXT: vpmovsxbd %xmm4, %zmm4 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm4 ; KNL-NEXT: vpslld $31, %zmm4, %zmm4 ; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vextracti128 $1, %ymm2, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm2, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vextracti128 $1, %ymm3, %xmm1 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: vpmovsxbd %xmm3, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: vextracti128 $1, %ymm3, %xmm2 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: shll $16, %eax +; KNL-NEXT: orl %edx, %eax +; KNL-NEXT: shlq $32, %rax +; KNL-NEXT: orq %rcx, %rax +; KNL-NEXT: vpmovsxbd %xmm1, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm3, %zmm1 +; KNL-NEXT: kmovw %k0, %esi +; KNL-NEXT: shll $16, %esi +; KNL-NEXT: orl %ecx, %esi +; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: shlq $32, %rax -; KNL-NEXT: orq %rcx, %rax -; KNL-NEXT: movl (%rsp), %ecx -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %edx +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: shll $16, %edx +; KNL-NEXT: orl %ecx, %edx ; KNL-NEXT: shlq $32, %rdx -; KNL-NEXT: orq %rcx, %rdx +; KNL-NEXT: orq %rsi, %rdx ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper @@ -2178,7 +2173,7 @@ ; KNL-NEXT: movq %rsp, %rbp ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-128, %rsp -; KNL-NEXT: subq $384, %rsp ## imm = 0x180 +; KNL-NEXT: subq $256, %rsp ## imm = 0x100 ; KNL-NEXT: ## kill: def %esi killed %esi def %rsi ; KNL-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; KNL-NEXT: vpxor %ymm4, %ymm0, %ymm0 @@ -2194,56 +2189,60 @@ ; KNL-NEXT: vmovdqa %ymm3, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) -; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) -; KNL-NEXT: setne 128(%rsp,%rsi) -; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 -; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm2 +; KNL-NEXT: vmovdqa %ymm0, (%rsp) +; KNL-NEXT: setne (%rsp,%rsi) +; KNL-NEXT: vmovdqa (%rsp), %ymm2 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm3 +; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm1 ; KNL-NEXT: vmovdqa {{[0-9]+}}(%rsp), %ymm0 -; KNL-NEXT: vextracti128 $1, %ymm1, %xmm4 -; KNL-NEXT: vpmovsxbd %xmm4, %zmm4 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm4 ; KNL-NEXT: vpslld $31, %zmm4, %zmm4 ; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vextracti128 $1, %ymm2, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm2, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vextracti128 $1, %ymm3, %xmm1 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: vpmovsxbd %xmm3, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: vextracti128 $1, %ymm3, %xmm2 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: shll $16, %eax +; KNL-NEXT: orl %edx, %eax +; KNL-NEXT: shlq $32, %rax +; KNL-NEXT: orq %rcx, %rax +; KNL-NEXT: vpmovsxbd %xmm1, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vextracti128 $1, %ymm1, %xmm1 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm3, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 +; KNL-NEXT: kmovw %k0, %esi +; KNL-NEXT: shll $16, %esi +; KNL-NEXT: orl %ecx, %esi +; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax -; KNL-NEXT: shlq $32, %rax -; KNL-NEXT: orq %rcx, %rax -; KNL-NEXT: movl (%rsp), %ecx -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %edx +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: shll $16, %edx +; KNL-NEXT: orl %ecx, %edx ; KNL-NEXT: shlq $32, %rdx -; KNL-NEXT: orq %rcx, %rdx +; KNL-NEXT: orq %rsi, %rdx ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper Index: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll +++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll @@ -975,38 +975,32 @@ ; ; KNL-LABEL: test16: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: .cfi_offset %rbp, -16 -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: .cfi_def_cfa_register %rbp -; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $64, %rsp -; KNL-NEXT: movl %edi, (%rsp) +; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: shrq $32, %rdi -; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; KNL-NEXT: kmovw (%rsp), %k1 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; KNL-NEXT: shrq $48, %rax +; KNL-NEXT: shrl $16, %ecx +; KNL-NEXT: kmovw %ecx, %k1 +; KNL-NEXT: kmovw %eax, %k2 +; KNL-NEXT: kmovw %edi, %k3 +; KNL-NEXT: movb $1, %al +; KNL-NEXT: kmovw %eax, %k4 +; KNL-NEXT: kshiftrw $5, %k0, %k5 +; KNL-NEXT: kxorw %k4, %k5, %k4 +; KNL-NEXT: kshiftlw $15, %k4, %k4 +; KNL-NEXT: kshiftrw $10, %k4, %k4 +; KNL-NEXT: kxorw %k0, %k4, %k4 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: movl $1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z} +; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 -; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 -; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp +; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test16: @@ -1037,38 +1031,32 @@ ; ; AVX512DQ-LABEL: test16: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: pushq %rbp -; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 -; AVX512DQ-NEXT: .cfi_offset %rbp, -16 -; AVX512DQ-NEXT: movq %rsp, %rbp -; AVX512DQ-NEXT: .cfi_def_cfa_register %rbp -; AVX512DQ-NEXT: andq $-32, %rsp -; AVX512DQ-NEXT: subq $64, %rsp -; AVX512DQ-NEXT: movl %edi, (%rsp) +; AVX512DQ-NEXT: movq %rdi, %rax +; AVX512DQ-NEXT: movl %edi, %ecx +; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: shrq $32, %rdi -; AVX512DQ-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; AVX512DQ-NEXT: kmovw (%rsp), %k0 -; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 +; AVX512DQ-NEXT: shrq $48, %rax +; AVX512DQ-NEXT: shrl $16, %ecx +; AVX512DQ-NEXT: kmovw %ecx, %k1 +; AVX512DQ-NEXT: kmovw %eax, %k2 +; AVX512DQ-NEXT: kmovw %edi, %k3 +; AVX512DQ-NEXT: movb $1, %al +; AVX512DQ-NEXT: kmovw %eax, %k4 +; AVX512DQ-NEXT: kshiftrw $5, %k0, %k5 +; AVX512DQ-NEXT: kxorw %k4, %k5, %k4 +; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 +; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4 +; AVX512DQ-NEXT: kxorw %k0, %k4, %k0 +; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 -; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512DQ-NEXT: movl $1, %eax -; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 -; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 +; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 -; AVX512DQ-NEXT: movq %rbp, %rsp -; AVX512DQ-NEXT: popq %rbp +; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: retq %a = bitcast i64 %x to <64 x i1> %b = insertelement <64 x i1>%a, i1 true, i32 5 @@ -1080,40 +1068,33 @@ ; ; KNL-LABEL: test17: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: .cfi_offset %rbp, -16 -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: .cfi_def_cfa_register %rbp -; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $64, %rsp -; KNL-NEXT: movl %edi, (%rsp) +; KNL-NEXT: movq %rdi, %rax +; KNL-NEXT: movl %edi, %ecx +; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: shrq $32, %rdi -; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; KNL-NEXT: kmovw (%rsp), %k1 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; KNL-NEXT: vpmovdb %zmm0, %xmm0 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: xorl %eax, %eax +; KNL-NEXT: shrq $48, %rax +; KNL-NEXT: shrl $16, %ecx +; KNL-NEXT: kmovw %ecx, %k1 +; KNL-NEXT: kmovw %eax, %k2 +; KNL-NEXT: kmovw %edi, %k3 ; KNL-NEXT: cmpl %edx, %esi ; KNL-NEXT: setg %al -; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; KNL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; KNL-NEXT: kmovw %eax, %k4 +; KNL-NEXT: kshiftrw $5, %k0, %k5 +; KNL-NEXT: kxorw %k4, %k5, %k4 +; KNL-NEXT: kshiftlw $15, %k4, %k4 +; KNL-NEXT: kshiftrw $10, %k4, %k4 +; KNL-NEXT: kxorw %k0, %k4, %k4 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z} +; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 -; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} +; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k4} {z} +; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm2, %xmm2 -; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 -; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp +; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test17: @@ -1146,40 +1127,33 @@ ; ; AVX512DQ-LABEL: test17: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: pushq %rbp -; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 -; AVX512DQ-NEXT: .cfi_offset %rbp, -16 -; AVX512DQ-NEXT: movq %rsp, %rbp -; AVX512DQ-NEXT: .cfi_def_cfa_register %rbp -; AVX512DQ-NEXT: andq $-32, %rsp -; AVX512DQ-NEXT: subq $64, %rsp -; AVX512DQ-NEXT: movl %edi, (%rsp) +; AVX512DQ-NEXT: movq %rdi, %rax +; AVX512DQ-NEXT: movl %edi, %ecx +; AVX512DQ-NEXT: kmovw %edi, %k0 ; AVX512DQ-NEXT: shrq $32, %rdi -; AVX512DQ-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; AVX512DQ-NEXT: kmovw (%rsp), %k0 -; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 -; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512DQ-NEXT: xorl %eax, %eax +; AVX512DQ-NEXT: shrq $48, %rax +; AVX512DQ-NEXT: shrl $16, %ecx +; AVX512DQ-NEXT: kmovw %ecx, %k1 +; AVX512DQ-NEXT: kmovw %eax, %k2 +; AVX512DQ-NEXT: kmovw %edi, %k3 ; AVX512DQ-NEXT: cmpl %edx, %esi ; AVX512DQ-NEXT: setg %al -; AVX512DQ-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 -; AVX512DQ-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 +; AVX512DQ-NEXT: kmovw %eax, %k4 +; AVX512DQ-NEXT: kshiftrw $5, %k0, %k5 +; AVX512DQ-NEXT: kxorw %k4, %k5, %k4 +; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 +; AVX512DQ-NEXT: kshiftrw $10, %k4, %k4 +; AVX512DQ-NEXT: kxorw %k0, %k4, %k0 +; AVX512DQ-NEXT: vpmovm2d %k3, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vpmovm2d %k2, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2 ; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 -; AVX512DQ-NEXT: movq %rbp, %rsp -; AVX512DQ-NEXT: popq %rbp +; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: retq %a = bitcast i64 %x to <64 x i1> %b = icmp sgt i32 %y, %z @@ -1815,51 +1789,29 @@ ; ; KNL-LABEL: ktest_2: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: .cfi_offset %rbp, -16 -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: .cfi_def_cfa_register %rbp -; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $32, %rsp ; KNL-NEXT: vmovups (%rdi), %zmm2 ; KNL-NEXT: vmovups 64(%rdi), %zmm3 -; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k1 -; KNL-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm2, %xmm2 -; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k2 -; KNL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; KNL-NEXT: vpmovdb %zmm3, %xmm3 -; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z} -; KNL-NEXT: vmovups 4(%rdi), %zmm5 {%k1} {z} -; KNL-NEXT: vcmpltps %zmm5, %zmm0, %k1 -; KNL-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm5, %xmm5 -; KNL-NEXT: vpor %xmm5, %xmm2, %xmm2 -; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k1 -; KNL-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; KNL-NEXT: vpmovdb %zmm4, %xmm4 -; KNL-NEXT: vpor %xmm4, %xmm3, %xmm3 -; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 -; KNL-NEXT: vpslld $31, %zmm3, %zmm3 -; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL-NEXT: vpslld $31, %zmm2, %zmm2 -; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: cmpl $0, (%rsp) +; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1 +; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2 +; KNL-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z} +; KNL-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z} +; KNL-NEXT: vcmpltps %zmm3, %zmm1, %k0 +; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k3 +; KNL-NEXT: korw %k3, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax +; KNL-NEXT: korw %k0, %k1, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: orl %eax, %ecx ; KNL-NEXT: je LBB42_2 ; KNL-NEXT: ## %bb.1: ## %L1 ; KNL-NEXT: vmovaps %zmm0, (%rdi) ; KNL-NEXT: vmovaps %zmm1, 64(%rdi) -; KNL-NEXT: jmp LBB42_3 +; KNL-NEXT: vzeroupper +; KNL-NEXT: retq ; KNL-NEXT: LBB42_2: ## %L2 ; KNL-NEXT: vmovaps %zmm0, 4(%rdi) ; KNL-NEXT: vmovaps %zmm1, 68(%rdi) -; KNL-NEXT: LBB42_3: ## %End -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -1917,51 +1869,29 @@ ; ; AVX512DQ-LABEL: ktest_2: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: pushq %rbp -; AVX512DQ-NEXT: .cfi_def_cfa_offset 16 -; AVX512DQ-NEXT: .cfi_offset %rbp, -16 -; AVX512DQ-NEXT: movq %rsp, %rbp -; AVX512DQ-NEXT: .cfi_def_cfa_register %rbp -; AVX512DQ-NEXT: andq $-32, %rsp -; AVX512DQ-NEXT: subq $32, %rsp ; AVX512DQ-NEXT: vmovups (%rdi), %zmm2 ; AVX512DQ-NEXT: vmovups 64(%rdi), %zmm3 -; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k1 -; AVX512DQ-NEXT: vpmovm2d %k1, %zmm2 -; AVX512DQ-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k2 -; AVX512DQ-NEXT: vpmovm2d %k2, %zmm3 -; AVX512DQ-NEXT: vpmovdb %zmm3, %xmm3 -; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z} -; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm5 {%k1} {z} -; AVX512DQ-NEXT: vcmpltps %zmm5, %zmm0, %k0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm5 -; AVX512DQ-NEXT: vpmovdb %zmm5, %xmm5 -; AVX512DQ-NEXT: vpor %xmm5, %xmm2, %xmm2 -; AVX512DQ-NEXT: vcmpltps %zmm4, %zmm1, %k0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm4 -; AVX512DQ-NEXT: vpmovdb %zmm4, %xmm4 -; AVX512DQ-NEXT: vpor %xmm4, %xmm3, %xmm3 -; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 -; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 -; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kmovw %k0, (%rsp) -; AVX512DQ-NEXT: cmpl $0, (%rsp) +; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k1 +; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k2 +; AVX512DQ-NEXT: vmovups 4(%rdi), %zmm2 {%k2} {z} +; AVX512DQ-NEXT: vmovups 68(%rdi), %zmm3 {%k1} {z} +; AVX512DQ-NEXT: vcmpltps %zmm3, %zmm1, %k0 +; AVX512DQ-NEXT: vcmpltps %zmm2, %zmm0, %k3 +; AVX512DQ-NEXT: korw %k3, %k2, %k2 +; AVX512DQ-NEXT: kmovw %k2, %eax +; AVX512DQ-NEXT: korw %k0, %k1, %k0 +; AVX512DQ-NEXT: kmovw %k0, %ecx +; AVX512DQ-NEXT: shll $16, %ecx +; AVX512DQ-NEXT: orl %eax, %ecx ; AVX512DQ-NEXT: je LBB42_2 ; AVX512DQ-NEXT: ## %bb.1: ## %L1 ; AVX512DQ-NEXT: vmovaps %zmm0, (%rdi) ; AVX512DQ-NEXT: vmovaps %zmm1, 64(%rdi) -; AVX512DQ-NEXT: jmp LBB42_3 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq ; AVX512DQ-NEXT: LBB42_2: ## %L2 ; AVX512DQ-NEXT: vmovaps %zmm0, 4(%rdi) ; AVX512DQ-NEXT: vmovaps %zmm1, 68(%rdi) -; AVX512DQ-NEXT: LBB42_3: ## %End -; AVX512DQ-NEXT: movq %rbp, %rsp -; AVX512DQ-NEXT: popq %rbp ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %addr1 = getelementptr float, float * %base, i64 0 @@ -2334,14 +2264,14 @@ define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { ; KNL-LABEL: store_32i1: ; KNL: ## %bb.0: -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 +; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, 2(%rdi) +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: kmovw %k1, 2(%rdi) ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -2364,14 +2294,14 @@ ; ; AVX512DQ-LABEL: store_32i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 ; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -2383,12 +2313,12 @@ ; KNL-LABEL: store_32i1_1: ; KNL: ## %bb.0: ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, 2(%rdi) ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: vpmovsxwd %ymm1, %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: kmovw %k1, 2(%rdi) ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -2412,12 +2342,12 @@ ; AVX512DQ-LABEL: store_32i1_1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0 +; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -2431,21 +2361,21 @@ ; ; KNL-LABEL: store_64i1: ; KNL: ## %bb.0: -; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 -; KNL-NEXT: vpslld $31, %zmm3, %zmm3 -; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 -; KNL-NEXT: kmovw %k0, 6(%rdi) -; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL-NEXT: vpslld $31, %zmm2, %zmm2 -; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 -; KNL-NEXT: kmovw %k0, 4(%rdi) -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, 2(%rdi) ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k2 +; KNL-NEXT: vpmovsxbd %xmm3, %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k3 +; KNL-NEXT: kmovw %k3, 6(%rdi) +; KNL-NEXT: kmovw %k2, 4(%rdi) +; KNL-NEXT: kmovw %k1, 2(%rdi) ; KNL-NEXT: kmovw %k0, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -2468,21 +2398,21 @@ ; ; AVX512DQ-LABEL: store_64i1: ; AVX512DQ: ## %bb.0: -; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512DQ-NEXT: vpslld $31, %zmm3, %zmm3 -; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kmovw %k0, 6(%rdi) -; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512DQ-NEXT: vpslld $31, %zmm2, %zmm2 -; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kmovw %k0, 4(%rdi) -; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0 +; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0 +; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm0 +; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k3 +; AVX512DQ-NEXT: kmovw %k3, 6(%rdi) +; AVX512DQ-NEXT: kmovw %k2, 4(%rdi) +; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/avx512-masked-memop-64-32.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-masked-memop-64-32.ll +++ llvm/trunk/test/CodeGen/X86/avx512-masked-memop-64-32.ll @@ -240,18 +240,18 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0) { ; AVX512F-LABEL: test_load_32f64: ; AVX512F: ## %bb.0: -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm5 -; AVX512F-NEXT: vpmovsxbd %xmm5, %zmm5 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm5 ; AVX512F-NEXT: vpslld $31, %zmm5, %zmm5 ; AVX512F-NEXT: vptestmd %zmm5, %zmm5, %k1 -; AVX512F-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k1} +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 -; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k2} +; AVX512F-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2} +; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: kshiftrw $8, %k2, %k2 +; AVX512F-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k2} ; AVX512F-NEXT: kshiftrw $8, %k1, %k1 -; AVX512F-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1} -; AVX512F-NEXT: kshiftrw $8, %k2, %k1 ; AVX512F-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1} ; AVX512F-NEXT: vmovapd %zmm5, %zmm2 ; AVX512F-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll +++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll @@ -206,21 +206,12 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind { ; KNL-LABEL: test12_v32i32: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $32, %rsp -; KNL-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 -; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 -; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: movl (%rsp), %eax -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp +; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vpcmpeqd %zmm3, %zmm1, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: shll $16, %eax +; KNL-NEXT: orl %ecx, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -249,32 +240,28 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind { ; KNL-LABEL: test12_v64i16: ; KNL: ## %bb.0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: movq %rsp, %rbp -; KNL-NEXT: andq $-32, %rsp -; KNL-NEXT: subq $64, %rsp -; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1 -; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, (%rsp) -; KNL-NEXT: vpcmpeqw %ymm7, %ymm3, %ymm0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: shll $16, %ecx +; KNL-NEXT: orl %eax, %ecx ; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: movl (%rsp), %ecx -; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: vpcmpeqw %ymm7, %ymm3, %ymm0 +; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: shll $16, %eax +; KNL-NEXT: orl %edx, %eax ; KNL-NEXT: shlq $32, %rax ; KNL-NEXT: orq %rcx, %rax -; KNL-NEXT: movq %rbp, %rsp -; KNL-NEXT: popq %rbp ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -11,22 +11,10 @@ ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -47,22 +35,10 @@ ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -85,25 +61,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -127,25 +89,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -170,27 +118,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -211,27 +143,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -254,30 +170,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -301,30 +199,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -350,30 +230,16 @@ ; ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -395,30 +261,16 @@ ; ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -442,41 +294,19 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -501,41 +331,19 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -675,49 +483,10 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -738,49 +507,10 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -803,50 +533,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -870,50 +561,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -938,54 +590,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1006,54 +615,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1076,55 +642,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1148,55 +671,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1222,22 +702,10 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1259,22 +727,10 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1298,25 +754,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1341,25 +783,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1385,27 +813,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1427,27 +839,11 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1471,30 +867,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1519,30 +897,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1568,30 +928,43 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vmovq %xmm3, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -1599,109 +972,109 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm2, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm5, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm5, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -1709,7 +1082,11 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 +; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -1717,51 +1094,20 @@ ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm8, %rcx -; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1783,67 +1129,60 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm1 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -1851,7 +1190,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -1861,37 +1200,30 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1915,51 +1247,23 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vmovq %xmm3, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -1967,109 +1271,109 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm5, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -2077,17 +1381,7 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -2095,35 +1389,47 @@ ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2148,68 +1454,60 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; NoVLX-NEXT: vmovq %xmm1, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm1 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -2217,7 +1515,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -2227,51 +1525,33 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2678,37 +1958,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2729,37 +1984,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2782,38 +2012,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2838,38 +2043,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2895,37 +2075,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -2949,38 +2104,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3007,42 +2137,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3063,42 +2164,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3121,43 +2193,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3182,43 +2225,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3244,42 +2258,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3303,43 +2288,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3561,49 +2517,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3625,49 +2544,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3691,50 +2573,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3759,50 +2604,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3828,49 +2636,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3895,50 +2666,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -3965,54 +2699,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4034,54 +2727,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4105,57 +2757,16 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp -; NoVLX-NEXT: vzeroupper -; NoVLX-NEXT: retq +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax +; NoVLX-NEXT: vzeroupper +; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> @@ -4178,55 +2789,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4252,54 +2822,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4324,55 +2853,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4399,22 +2887,8 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4436,22 +2910,8 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4475,23 +2935,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4516,23 +2962,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4558,22 +2990,8 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4598,23 +3016,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4641,27 +3045,9 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4683,27 +3069,9 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4727,28 +3095,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4773,28 +3123,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4820,27 +3152,9 @@ ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -4865,28 +3179,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5485,31 +3781,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5530,31 +3807,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5577,32 +3835,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5627,32 +3866,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5678,31 +3898,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5726,32 +3927,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5778,36 +3960,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5828,36 +3987,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5880,37 +4016,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5935,37 +4048,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -5991,36 +4081,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6044,37 +4111,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6496,37 +4540,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6548,37 +4567,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6602,38 +4596,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6659,38 +4628,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6717,37 +4661,12 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6772,38 +4691,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6831,42 +4725,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6888,42 +4753,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6947,43 +4783,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7009,43 +4816,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7072,42 +4850,13 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7132,43 +4881,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7366,47 +5086,8 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7428,47 +5109,8 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7492,48 +5134,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7558,48 +5161,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7625,47 +5189,8 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7690,48 +5215,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7758,52 +5244,9 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7825,52 +5268,9 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7894,53 +5294,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -7965,53 +5322,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8037,52 +5351,9 @@ ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8107,53 +5378,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8179,22 +5407,10 @@ ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8215,22 +5431,10 @@ ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8253,25 +5457,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8295,25 +5485,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8338,27 +5514,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8379,27 +5539,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8422,30 +5566,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8469,30 +5595,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8518,30 +5626,16 @@ ; ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8563,30 +5657,16 @@ ; ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8610,41 +5690,19 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8669,41 +5727,19 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8843,49 +5879,10 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8906,49 +5903,10 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -8971,50 +5929,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9038,50 +5957,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9106,54 +5986,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9174,54 +6011,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9244,55 +6038,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9316,55 +6067,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9390,22 +6098,10 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9427,22 +6123,10 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9466,25 +6150,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9509,25 +6179,11 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9553,27 +6209,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9595,27 +6235,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9639,30 +6263,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9687,30 +6293,12 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9736,30 +6324,63 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vmovq %xmm3, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -9769,17 +6390,17 @@ ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 @@ -9787,69 +6408,69 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm2, %rcx +; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm7, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm5, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm7, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm5, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -9858,26 +6479,10 @@ ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -9885,51 +6490,20 @@ ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm8, %rcx -; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -9951,67 +6525,60 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm1 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -10019,7 +6586,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -10029,37 +6596,30 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -10083,51 +6643,23 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vmovq %xmm3, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -10135,109 +6667,109 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm5, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -10245,17 +6777,7 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -10263,35 +6785,47 @@ ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -10316,68 +6850,60 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; NoVLX-NEXT: vmovq %xmm1, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm1 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -10385,7 +6911,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -10395,51 +6921,33 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -10846,37 +7354,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -10897,37 +7380,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -10950,38 +7408,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11006,38 +7439,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11063,37 +7471,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11117,38 +7500,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11175,42 +7533,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11231,42 +7560,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11289,43 +7589,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11350,43 +7621,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11412,42 +7654,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11471,43 +7684,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11729,49 +7913,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11793,49 +7940,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11859,50 +7969,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11927,50 +8000,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -11996,49 +8032,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12063,50 +8062,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12133,54 +8095,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12202,54 +8123,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12273,55 +8153,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12346,55 +8185,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12420,54 +8218,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12492,55 +8249,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12567,22 +8283,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12604,22 +8306,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12643,23 +8331,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12684,23 +8358,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12726,22 +8386,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12766,23 +8412,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12809,27 +8441,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12851,27 +8465,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12895,28 +8491,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12941,28 +8519,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12988,27 +8548,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13033,28 +8575,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13653,31 +9177,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13698,31 +9203,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13745,32 +9231,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13795,32 +9262,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13846,31 +9294,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13894,32 +9323,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13946,36 +9356,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -13996,36 +9383,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14048,37 +9412,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14103,37 +9444,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14159,36 +9477,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14212,37 +9507,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14664,37 +9936,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14716,37 +9963,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14770,38 +9992,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14827,38 +10024,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14885,37 +10057,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14940,38 +10087,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -14999,42 +10121,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15056,42 +10149,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15115,43 +10179,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15177,43 +10212,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15240,42 +10246,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15300,43 +10277,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15534,47 +10482,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15596,47 +10505,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15660,48 +10530,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15726,48 +10557,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15793,47 +10585,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15858,48 +10611,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15926,52 +10640,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -15993,52 +10664,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16062,53 +10690,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16133,53 +10718,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16205,52 +10747,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16275,53 +10774,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16347,25 +10803,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16386,26 +10830,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16428,28 +10860,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16473,13 +10891,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 @@ -16487,15 +10898,8 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16520,30 +10924,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16564,31 +10952,15 @@ ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16611,33 +10983,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16661,13 +11015,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 @@ -16675,20 +11022,9 @@ ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16714,34 +11050,20 @@ ; ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16763,35 +11085,21 @@ ; ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16815,43 +11123,23 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -16876,44 +11164,24 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vmovdqa (%rsi), %ymm3 -; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0 -; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 +; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17067,52 +11335,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17133,13 +11362,6 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 @@ -17147,39 +11369,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17202,13 +11392,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 @@ -17216,39 +11399,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17272,13 +11423,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 @@ -17287,39 +11431,7 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17344,57 +11456,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17415,13 +11484,6 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 @@ -17429,44 +11491,8 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17489,13 +11515,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 @@ -17503,44 +11522,8 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17564,13 +11547,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 @@ -17579,44 +11555,8 @@ ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17642,25 +11582,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17682,26 +11610,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17725,28 +11641,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17771,13 +11673,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 @@ -17785,15 +11680,8 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17819,30 +11707,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17864,31 +11736,15 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17912,33 +11768,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17963,13 +11801,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 @@ -17977,20 +11808,9 @@ ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -18016,205 +11836,191 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vmovq %xmm3, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm2, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm7, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm7, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm5, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm5, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -18236,67 +12042,60 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm1 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -18304,7 +12103,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -18314,44 +12113,37 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm2 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -18375,51 +12167,23 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vmovq %xmm3, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -18427,109 +12191,109 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm5, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -18537,17 +12301,7 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -18555,38 +12309,52 @@ ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -18611,68 +12379,60 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; NoVLX-NEXT: vmovq %xmm1, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm1 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -18680,7 +12440,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -18690,56 +12450,40 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vmovdqa (%rsi), %ymm3 -; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0 -; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm3 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %ymm3, %ymm1, %ymm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; NoVLX-NEXT: vmovdqa (%rsi), %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 +; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 +; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19146,37 +12890,12 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19197,37 +12916,12 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19250,38 +12944,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19306,38 +12975,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19363,37 +13007,12 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19417,38 +13036,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19475,42 +13069,13 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19531,42 +13096,13 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19589,43 +13125,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19650,43 +13157,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19712,42 +13190,13 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -19771,43 +13220,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20029,49 +13449,12 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20093,49 +13476,12 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20159,50 +13505,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20227,50 +13536,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20296,49 +13568,12 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20363,50 +13598,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20433,54 +13631,13 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20502,54 +13659,13 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20573,55 +13689,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20646,55 +13721,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20720,54 +13754,13 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20792,55 +13785,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20867,22 +13819,8 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20904,22 +13842,8 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20943,23 +13867,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -20984,23 +13894,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21026,22 +13922,8 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21066,23 +13948,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21109,27 +13977,9 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21151,27 +14001,9 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21195,28 +14027,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21241,28 +14055,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21288,27 +14084,9 @@ ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21333,28 +14111,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21953,31 +14713,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -21998,31 +14739,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22045,32 +14767,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22095,32 +14798,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22146,31 +14830,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22194,32 +14859,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22246,36 +14892,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22296,36 +14919,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22348,37 +14948,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22403,37 +14980,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22459,36 +15013,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22512,37 +15043,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -22964,37 +15472,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23016,37 +15499,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23070,38 +15528,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23127,38 +15560,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23185,37 +15593,12 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23240,38 +15623,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23299,42 +15657,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23356,42 +15685,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23415,43 +15715,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23477,43 +15748,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23540,42 +15782,13 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23600,43 +15813,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23834,47 +16018,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23896,47 +16041,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -23960,48 +16066,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24026,48 +16093,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24093,47 +16121,8 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24158,48 +16147,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24226,52 +16176,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24293,52 +16200,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24362,53 +16226,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24433,53 +16254,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24505,52 +16283,9 @@ ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24575,53 +16310,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24647,25 +16339,13 @@ ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24686,25 +16366,13 @@ ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24727,28 +16395,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24772,28 +16426,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24818,30 +16458,14 @@ ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24862,30 +16486,14 @@ ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24908,33 +16516,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -24958,33 +16548,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25010,33 +16582,19 @@ ; ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25058,33 +16616,19 @@ ; ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 +; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25108,44 +16652,22 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 -; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25170,44 +16692,22 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 -; NoVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm3, %ymm3 -; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2 -; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 -; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 +; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25359,52 +16859,13 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25425,52 +16886,13 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25493,13 +16915,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 @@ -25507,39 +16922,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25563,13 +16946,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 @@ -25577,39 +16953,7 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25634,57 +16978,14 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25705,57 +17006,14 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25778,13 +17036,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 @@ -25792,44 +17043,8 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25853,13 +17068,6 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 @@ -25867,44 +17075,8 @@ ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25930,25 +17102,13 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -25970,25 +17130,13 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26012,28 +17160,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26058,28 +17192,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26105,30 +17225,14 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26150,30 +17254,14 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26197,33 +17285,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26248,33 +17318,15 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26300,30 +17352,43 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vmovq %xmm3, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 +; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -26331,109 +17396,109 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm2, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm5, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm5, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -26441,7 +17506,10 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -26449,56 +17517,26 @@ ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm8, %rcx -; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 -; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 -; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 -; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1 +; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm0 +; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26520,67 +17558,60 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm1 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -26588,7 +17619,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -26598,42 +17629,35 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor 32(%rdi), %ymm2, %ymm3 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rdi), %ymm2, %ymm1 +; NoVLX-NEXT: vpxor (%rdi), %ymm2, %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm0 +; NoVLX-NEXT: vpxor 32(%rdi), %ymm2, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26657,51 +17681,23 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 +; NoVLX-NEXT: vmovq %xmm3, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 +; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 +; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 +; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 +; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -26709,109 +17705,109 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm6, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm5 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm5, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vmovd %ecx, %xmm2 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: vmovq %xmm1, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 @@ -26819,16 +17815,7 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm6, %xmm6 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx @@ -26836,41 +17823,52 @@ ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vmovq %xmm8, %rcx +; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm5, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: movl %ecx, %eax +; NoVLX-NEXT: shrl $16, %eax +; NoVLX-NEXT: vmovd %ecx, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rcx, %rax +; NoVLX-NEXT: shrq $32, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpextrq $1, %xmm8, %rax +; NoVLX-NEXT: shrq $48, %rcx +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movl %eax, %ecx +; NoVLX-NEXT: shrl $16, %ecx +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: movq %rax, %rcx +; NoVLX-NEXT: shrq $32, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 +; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: shrq $48, %rax +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] +; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3 +; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 +; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 -; NoVLX-NEXT: vpxor %ymm5, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor %ymm5, %ymm3, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm6, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -26895,28 +17893,20 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm3 +; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 @@ -26924,39 +17914,39 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx +; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 +; NoVLX-NEXT: vmovd %ecx, %xmm3 +; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax +; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 +; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 @@ -26964,7 +17954,7 @@ ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx +; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax @@ -26974,56 +17964,38 @@ ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax +; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 +; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 +; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z} -; NoVLX-NEXT: vpmovdb %zmm4, %xmm4 -; NoVLX-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z} -; NoVLX-NEXT: vpmovdb %zmm5, %xmm5 +; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: shrl $16, %edi +; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] -; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; NoVLX-NEXT: vpxor (%rsi), %ymm2, %ymm3 -; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 -; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpxor 32(%rsi), %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 +; NoVLX-NEXT: vpxor 32(%rsi), %ymm2, %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 -; NoVLX-NEXT: vpand %xmm5, %xmm1, %xmm1 -; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: shlq $32, %rax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vpxor (%rsi), %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 +; NoVLX-NEXT: kmovw %edi, %k2 +; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: shll $16, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27430,37 +18402,12 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27481,37 +18428,12 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27534,38 +18456,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27590,38 +18487,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27647,37 +18519,12 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27701,38 +18548,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27759,42 +18581,13 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27815,42 +18608,13 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27873,43 +18637,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27934,43 +18669,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -27996,42 +18702,13 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28055,43 +18732,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28313,49 +18961,12 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28377,49 +18988,12 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28443,50 +19017,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28511,50 +19048,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28580,49 +19080,12 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28647,50 +19110,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28717,54 +19143,13 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28786,54 +19171,13 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28857,55 +19201,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -28930,55 +19233,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29004,54 +19266,13 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29076,55 +19297,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29151,22 +19331,8 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29188,22 +19354,8 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29227,23 +19379,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29268,23 +19406,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29310,22 +19434,8 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29350,23 +19460,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29393,27 +19489,9 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29435,27 +19513,9 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29479,28 +19539,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29525,28 +19567,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29572,27 +19596,9 @@ ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -29617,28 +19623,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30237,31 +20225,12 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30282,31 +20251,12 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30329,32 +20279,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30379,32 +20310,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30430,31 +20342,12 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30478,32 +20371,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30530,36 +20404,13 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30580,36 +20431,13 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30632,37 +20460,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30687,37 +20492,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30743,36 +20525,13 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -30796,37 +20555,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31248,37 +20984,12 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31300,37 +21011,12 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31354,38 +21040,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31411,38 +21072,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31469,37 +21105,12 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31524,38 +21135,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31583,42 +21169,13 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31640,42 +21197,13 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31699,43 +21227,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31761,43 +21260,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31824,42 +21294,13 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -31884,43 +21325,14 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32118,47 +21530,8 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32180,47 +21553,8 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32244,48 +21578,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32310,48 +21605,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32377,47 +21633,8 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32442,48 +21659,9 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32510,52 +21688,9 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32577,52 +21712,9 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32646,53 +21738,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32717,53 +21766,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32789,52 +21795,9 @@ ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -32859,53 +21822,10 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33308,37 +22228,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33359,37 +22254,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33411,37 +22281,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33465,38 +22310,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33520,38 +22340,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33576,38 +22371,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33634,42 +22404,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33690,42 +22431,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33747,42 +22459,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33806,43 +22489,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33866,43 +22520,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovaps (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -33927,43 +22552,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34185,49 +22781,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34249,49 +22808,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34314,49 +22836,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34381,50 +22866,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34449,50 +22897,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34518,50 +22929,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34589,54 +22963,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34658,54 +22991,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34728,54 +23020,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34800,55 +23051,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34873,55 +23083,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovaps (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -34947,55 +23116,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $8, %k0, %k0 +; NoVLX-NEXT: kshiftrw $8, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35023,22 +23151,8 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35060,22 +23174,8 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35098,22 +23198,8 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35138,23 +23224,9 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35179,23 +23251,9 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35221,23 +23279,9 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35306,27 +23350,9 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35348,27 +23374,9 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35391,27 +23399,9 @@ ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35436,28 +23426,10 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35482,28 +23454,10 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -35529,28 +23483,10 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k1 {%k1} -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %edi, %k1 +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36194,31 +24130,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36239,31 +24156,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36285,31 +24183,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36333,32 +24212,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36382,32 +24242,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36432,32 +24273,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36484,36 +24306,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36534,36 +24333,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36585,36 +24361,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36638,37 +24391,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1 ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36692,37 +24422,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %xmm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -36747,37 +24454,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $14, %k0, %k0 +; NoVLX-NEXT: kshiftrw $14, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37193,37 +24877,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37245,37 +24904,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37298,37 +24932,12 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37353,38 +24962,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37409,38 +24993,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37466,38 +25025,13 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37525,42 +25059,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37582,42 +25087,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vmovapd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37640,42 +25116,13 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37700,43 +25147,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37761,43 +25179,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vmovapd (%rsi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -37823,43 +25212,14 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edx -; NoVLX-NEXT: kmovw %k0, %esi -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kshiftlw $12, %k0, %k0 +; NoVLX-NEXT: kshiftrw $12, %k0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38113,47 +25473,8 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38175,47 +25496,8 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38238,47 +25520,8 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38303,48 +25546,9 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38369,48 +25573,9 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38436,48 +25601,9 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38555,52 +25681,9 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38622,52 +25705,9 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38690,52 +25730,9 @@ ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38760,53 +25757,10 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38831,53 +25785,10 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -38903,53 +25814,10 @@ ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} -; NoVLX-NEXT: kshiftrw $7, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r8d -; NoVLX-NEXT: kshiftrw $6, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r9d -; NoVLX-NEXT: kshiftrw $5, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %r10d -; NoVLX-NEXT: kshiftrw $4, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %esi -; NoVLX-NEXT: kshiftrw $3, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %edi -; NoVLX-NEXT: kshiftrw $2, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %eax -; NoVLX-NEXT: kshiftrw $1, %k0, %k1 -; NoVLX-NEXT: kmovw %k1, %ecx -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0 -; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx -; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: orq %rcx, %rax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -39028,22 +25896,8 @@ ; ; NoVLX-LABEL: test_cmpm_rnd_zero: ; NoVLX: # %bb.0: -; NoVLX-NEXT: pushq %rbp -; NoVLX-NEXT: .cfi_def_cfa_offset 16 -; NoVLX-NEXT: .cfi_offset %rbp, -16 -; NoVLX-NEXT: movq %rsp, %rbp -; NoVLX-NEXT: .cfi_def_cfa_register %rbp -; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k1 -; NoVLX-NEXT: kxorw %k0, %k0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, (%rsp) -; NoVLX-NEXT: movl (%rsp), %eax -; NoVLX-NEXT: movq %rbp, %rsp -; NoVLX-NEXT: popq %rbp +; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 +; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8) Index: llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll +++ llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -411,28 +411,22 @@ ; ; AVX512F-LABEL: v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1 -; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) -; AVX512F-NEXT: movl (%rsp), %eax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2} +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1} +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: orl %ecx, %eax ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll +++ llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll @@ -280,38 +280,22 @@ ; ; AVX512F-LABEL: v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp +; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm1 ; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2 -; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 -; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm2 -; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 -; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 -; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) -; AVX512F-NEXT: movl (%rsp), %eax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2} +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: orl %ecx, %eax ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -1332,43 +1316,40 @@ ; ; AVX512F-LABEL: v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $64, %rsp ; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 +; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 +; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k2 ; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2 -; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2 -; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 -; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k3 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k4 +; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: movl (%rsp), %ecx -; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax +; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2 +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 +; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 +; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 {%k4} +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 {%k3} +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: shll $16, %ecx +; AVX512F-NEXT: orl %eax, %ecx +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2} +; AVX512F-NEXT: kmovw %k0, %edx +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1} +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: orl %edx, %eax ; AVX512F-NEXT: shlq $32, %rax ; AVX512F-NEXT: orq %rcx, %rax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll +++ llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll @@ -505,24 +505,15 @@ ; ; AVX512F-LABEL: ext_i32_32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: shrl $16, %edi +; AVX512F-NEXT: kmovw %edi, %k2 ; AVX512F-NEXT: movl {{.*}}(%rip), %eax -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512VLBW-LABEL: ext_i32_32i8: @@ -792,27 +783,14 @@ ; ; AVX512F-LABEL: ext_i32_32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: shrl $16, %edi +; AVX512F-NEXT: kmovw %edi, %k2 +; AVX512F-NEXT: movl {{.*}}(%rip), %eax +; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z} +; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512F-NEXT: retq ; ; AVX512VLBW-LABEL: ext_i32_32i16: @@ -950,33 +928,27 @@ ; ; AVX512F-LABEL: ext_i64_64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $64, %rsp -; AVX512F-NEXT: movl %edi, (%rsp) -; AVX512F-NEXT: shrq $32, %rdi -; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: movq %rdi, %rax +; AVX512F-NEXT: movq %rdi, %rcx +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: movl %edi, %edx +; AVX512F-NEXT: shrl $16, %edx +; AVX512F-NEXT: shrq $32, %rax +; AVX512F-NEXT: shrq $48, %rcx +; AVX512F-NEXT: kmovw %ecx, %k2 +; AVX512F-NEXT: kmovw %eax, %k3 +; AVX512F-NEXT: kmovw %edx, %k4 ; AVX512F-NEXT: movl {{.*}}(%rip), %eax -; AVX512F-NEXT: kmovw (%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z} +; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k4} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 -; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z} +; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k3} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm2 {%k2} {z} ; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 ; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: retq ; ; AVX512VLBW-LABEL: ext_i64_64i8: Index: llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll +++ llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll @@ -183,24 +183,16 @@ ; ; AVX512F-LABEL: v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) -; AVX512F-NEXT: movl (%rsp), %eax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: orl %ecx, %eax ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll +++ llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll @@ -51,24 +51,16 @@ ; ; AVX512F-LABEL: v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $32, %rsp -; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) -; AVX512F-NEXT: movl (%rsp), %eax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: orl %ecx, %eax ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -1006,35 +998,28 @@ ; ; AVX512F-LABEL: v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $64, %rsp ; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm2 ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: shll $16, %ecx +; AVX512F-NEXT: orl %eax, %ecx ; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: kmovw %k0, %edx +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: movl (%rsp), %ecx -; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: shll $16, %eax +; AVX512F-NEXT: orl %edx, %eax ; AVX512F-NEXT: shlq $32, %rax ; AVX512F-NEXT: orq %rcx, %rax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll +++ llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll @@ -132,22 +132,70 @@ ; AVX256VL: # %bb.0: ; AVX256VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX256VL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX256VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX256VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX256VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX256VL-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX256VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX256VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX256VL-NEXT: vpmovsxbw %xmm1, %ymm1 +; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1 +; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k1 +; AVX256VL-NEXT: vpmovsxbw %xmm0, %ymm0 +; AVX256VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1 +; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k2 +; AVX256VL-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX256VL-NEXT: vptestmd %ymm0, %ymm0, %k3 +; AVX256VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 +; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k3} {z} +; AVX256VL-NEXT: vpmovdw %ymm1, %xmm1 +; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm2 {%k2} {z} +; AVX256VL-NEXT: vpmovdw %ymm2, %xmm2 +; AVX256VL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX256VL-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1] +; AVX256VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3],ymm2[4,5],ymm1[6],ymm2[7] +; AVX256VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,12,13,u,u,8,9,6,7,14,15,14,15,0,1,22,23,28,29,18,19,26,27,22,23,u,u,30,31,16,17] +; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm2 {%k1} {z} +; AVX256VL-NEXT: vpmovdw %ymm2, %xmm2 +; AVX256VL-NEXT: kshiftrw $8, %k1, %k1 +; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm3 {%k1} {z} +; AVX256VL-NEXT: vpmovdw %ymm3, %xmm3 +; AVX256VL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX256VL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,1,2,1] +; AVX256VL-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255] +; AVX256VL-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1 +; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm2 +; AVX256VL-NEXT: vpslld $31, %ymm2, %ymm2 +; AVX256VL-NEXT: vptestmd %ymm2, %ymm2, %k1 +; AVX256VL-NEXT: vextracti128 $1, %ymm1, %xmm1 +; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1 +; AVX256VL-NEXT: vpslld $31, %ymm1, %ymm1 +; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k0 +; AVX256VL-NEXT: kunpckbw %k1, %k0, %k0 +; AVX256VL-NEXT: kshiftrw $8, %k0, %k2 +; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z} +; AVX256VL-NEXT: vpmovdw %ymm1, %xmm1 +; AVX256VL-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 +; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; AVX256VL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX256VL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 +; AVX256VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX256VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX256VL-NEXT: retq ; ; AVX512NOBW-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: ; AVX512NOBW: # %bb.0: ; AVX512NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512NOBW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; AVX512NOBW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX512NOBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512NOBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX512NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX512NOBW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX512NOBW-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512NOBW-NEXT: vptestmd %zmm1, %zmm1, %k1 +; AVX512NOBW-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512NOBW-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512NOBW-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512NOBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512NOBW-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 +; AVX512NOBW-NEXT: vptestmd %zmm2, %zmm2, %k1 +; AVX512NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512NOBW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512NOBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX512NOBW-NEXT: retq ; ; AVX256VLBW-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: Index: llvm/trunk/test/CodeGen/X86/vector-compare-results.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-compare-results.ll +++ llvm/trunk/test/CodeGen/X86/vector-compare-results.ll @@ -2257,23 +2257,25 @@ ; ; AVX512F-LABEL: test_cmp_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm4 ; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm3 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm4 +; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm2 +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 -; AVX512F-NEXT: vmovdqa %xmm4, %xmm2 +; AVX512F-NEXT: vmovdqa %xmm4, %xmm1 +; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 killed %ymm2 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v64i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm4 ; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512DQ-NEXT: vextracti128 $1, %ymm4, %xmm3 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm4 +; AVX512DQ-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm2 +; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0 -; AVX512DQ-NEXT: vmovdqa %xmm4, %xmm2 +; AVX512DQ-NEXT: vmovdqa %xmm4, %xmm1 +; AVX512DQ-NEXT: # kill: def %xmm2 killed %xmm2 killed %ymm2 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; @@ -2711,32 +2713,24 @@ ; ; AVX512F-LABEL: test_cmp_v32f32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vcmpltps %zmm0, %zmm2, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vcmpltps %zmm1, %zmm3, %k1 +; AVX512F-NEXT: vcmpltps %zmm0, %zmm2, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v32f32: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k0 +; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k1 +; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: test_cmp_v32f32: @@ -3262,32 +3256,24 @@ ; ; AVX512F-LABEL: test_cmp_v32i32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm1, %k1 +; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm0, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v32i32: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpcmpgtd %zmm3, %zmm1, %k0 +; AVX512DQ-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 +; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: test_cmp_v32i32: @@ -6386,36 +6372,36 @@ ; AVX512F-LABEL: test_cmp_v128i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 -; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 -; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3 -; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 -; AVX512F-NEXT: vpmovsxbd %xmm4, %zmm4 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm4 ; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k0 -; AVX512F-NEXT: kmovw %k0, 14(%rdi) -; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512F-NEXT: kmovw %k0, 12(%rdi) -; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512F-NEXT: kmovw %k0, 10(%rdi) -; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kmovw %k0, 8(%rdi) -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512F-NEXT: kmovw %k0, 6(%rdi) -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, 4(%rdi) -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, 2(%rdi) +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k2 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k3 +; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k4 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k5 +; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k6 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k7 +; AVX512F-NEXT: kmovw %k7, 14(%rdi) +; AVX512F-NEXT: kmovw %k6, 12(%rdi) +; AVX512F-NEXT: kmovw %k5, 10(%rdi) +; AVX512F-NEXT: kmovw %k4, 8(%rdi) +; AVX512F-NEXT: kmovw %k3, 6(%rdi) +; AVX512F-NEXT: kmovw %k2, 4(%rdi) +; AVX512F-NEXT: kmovw %k1, 2(%rdi) ; AVX512F-NEXT: kmovw %k0, (%rdi) ; AVX512F-NEXT: movq %rdi, %rax ; AVX512F-NEXT: vzeroupper @@ -6424,36 +6410,36 @@ ; AVX512DQ-LABEL: test_cmp_v128i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3 -; AVX512DQ-NEXT: vextracti128 $1, %ymm3, %xmm4 -; AVX512DQ-NEXT: vpmovsxbd %xmm4, %zmm4 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm4 ; AVX512DQ-NEXT: vptestmd %zmm4, %zmm4, %k0 -; AVX512DQ-NEXT: kmovw %k0, 14(%rdi) -; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kmovw %k0, 12(%rdi) -; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3 -; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3 -; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0 -; AVX512DQ-NEXT: kmovw %k0, 10(%rdi) -; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kmovw %k0, 8(%rdi) -; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2 -; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0 -; AVX512DQ-NEXT: kmovw %k0, 6(%rdi) -; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 4(%rdi) -; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512DQ-NEXT: kmovw %k0, 2(%rdi) +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512DQ-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm0 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k2 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k3 +; AVX512DQ-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm0 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k4 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k5 +; AVX512DQ-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm0 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k6 +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k7 +; AVX512DQ-NEXT: kmovw %k7, 14(%rdi) +; AVX512DQ-NEXT: kmovw %k6, 12(%rdi) +; AVX512DQ-NEXT: kmovw %k5, 10(%rdi) +; AVX512DQ-NEXT: kmovw %k4, 8(%rdi) +; AVX512DQ-NEXT: kmovw %k3, 6(%rdi) +; AVX512DQ-NEXT: kmovw %k2, 4(%rdi) +; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: movq %rdi, %rax ; AVX512DQ-NEXT: vzeroupper @@ -6910,40 +6896,32 @@ ; ; AVX512F-LABEL: test_cmp_v32f64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vcmpltpd %zmm0, %zmm4, %k0 -; AVX512F-NEXT: vcmpltpd %zmm1, %zmm5, %k1 -; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vcmpltpd %zmm2, %zmm6, %k0 ; AVX512F-NEXT: vcmpltpd %zmm3, %zmm7, %k1 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 +; AVX512F-NEXT: vcmpltpd %zmm0, %zmm4, %k0 +; AVX512F-NEXT: vcmpltpd %zmm1, %zmm5, %k2 +; AVX512F-NEXT: kunpckbw %k0, %k2, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v32f64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm4, %k0 -; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm5, %k1 -; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vcmpltpd %zmm2, %zmm6, %k0 ; AVX512DQ-NEXT: vcmpltpd %zmm3, %zmm7, %k1 ; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0 +; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm4, %k1 +; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm5, %k2 +; AVX512DQ-NEXT: kunpckbw %k1, %k2, %k1 +; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: test_cmp_v32f64: @@ -7561,40 +7539,32 @@ ; ; AVX512F-LABEL: test_cmp_v32i64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpcmpgtq %zmm4, %zmm0, %k0 -; AVX512F-NEXT: vpcmpgtq %zmm5, %zmm1, %k1 -; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpcmpgtq %zmm6, %zmm2, %k0 ; AVX512F-NEXT: vpcmpgtq %zmm7, %zmm3, %k1 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 +; AVX512F-NEXT: vpcmpgtq %zmm4, %zmm0, %k0 +; AVX512F-NEXT: vpcmpgtq %zmm5, %zmm1, %k2 +; AVX512F-NEXT: kunpckbw %k0, %k2, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512DQ-LABEL: test_cmp_v32i64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpcmpgtq %zmm4, %zmm0, %k0 -; AVX512DQ-NEXT: vpcmpgtq %zmm5, %zmm1, %k1 -; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0 -; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 -; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpcmpgtq %zmm6, %zmm2, %k0 ; AVX512DQ-NEXT: vpcmpgtq %zmm7, %zmm3, %k1 ; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0 +; AVX512DQ-NEXT: vpcmpgtq %zmm4, %zmm0, %k1 +; AVX512DQ-NEXT: vpcmpgtq %zmm5, %zmm1, %k2 +; AVX512DQ-NEXT: kunpckbw %k1, %k2, %k1 +; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1 ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: test_cmp_v32i64: Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll @@ -214,20 +214,40 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<32 x i1> %a) { ; AVX512F-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 +; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm1 +; AVX512VL-NEXT: vpslld $31, %zmm1, %zmm1 +; AVX512VL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 +; AVX512VL-NEXT: vptestmd %zmm2, %zmm2, %k1 +; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0: @@ -250,25 +270,19 @@ ; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6 ; AVX512F-NEXT: vpcmpeqw %ymm6, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vpcmpeqw %ymm6, %ymm1, %ymm1 -; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX512F-NEXT: vpblendvb %ymm6, %ymm1, %ymm0, %ymm1 -; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm0 -; AVX512F-NEXT: vpandn %ymm4, %ymm0, %ymm4 -; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT: vpor %ymm4, %ymm0, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1 -; AVX512F-NEXT: vpmovsxbw %xmm1, %ymm1 -; AVX512F-NEXT: vpandn %ymm5, %ymm1, %ymm2 -; AVX512F-NEXT: vpand %ymm1, %ymm3, %ymm1 -; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: vpcmpeqw %ymm6, %ymm1, %ymm0 +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm6 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm6 +; AVX512F-NEXT: vptestmd %zmm6, %zmm6, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdw %zmm0, %ymm1 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm4, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm5, %ymm1 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i16: @@ -276,25 +290,19 @@ ; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6 ; AVX512VL-NEXT: vpcmpeqw %ymm6, %ymm0, %ymm0 ; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VL-NEXT: vpcmpeqw %ymm6, %ymm1, %ymm1 -; AVX512VL-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX512VL-NEXT: vpblendvb %ymm6, %ymm1, %ymm0, %ymm1 -; AVX512VL-NEXT: vpmovsxbw %xmm1, %ymm0 -; AVX512VL-NEXT: vpandn %ymm4, %ymm0, %ymm4 -; AVX512VL-NEXT: vpand %ymm0, %ymm2, %ymm0 -; AVX512VL-NEXT: vpor %ymm4, %ymm0, %ymm0 -; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm1 -; AVX512VL-NEXT: vpmovsxbw %xmm1, %ymm1 -; AVX512VL-NEXT: vpandn %ymm5, %ymm1, %ymm2 -; AVX512VL-NEXT: vpand %ymm1, %ymm3, %ymm1 -; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1 +; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512VL-NEXT: vpcmpeqw %ymm6, %ymm1, %ymm0 +; AVX512VL-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm6 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm6 +; AVX512VL-NEXT: vptestmd %zmm6, %zmm6, %k1 +; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512VL-NEXT: vpmovdw %zmm0, %ymm1 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm4, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm5, %ymm1 ; AVX512VL-NEXT: retq ; ; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_icmp_v32i16: @@ -318,11 +326,19 @@ ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX512F-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm3 +; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} +; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm4 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512F-NEXT: vpermi2d %zmm0, %zmm3, %zmm4 +; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 ; AVX512F-NEXT: retq ; @@ -330,11 +346,19 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm3 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX512VL-NEXT: vpblendvb %ymm4, %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm3 +; AVX512VL-NEXT: vptestmd %zmm3, %zmm3, %k1 +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512VL-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z} +; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512VL-NEXT: vpermi2d %zmm0, %zmm3, %zmm4 +; AVX512VL-NEXT: vptestmd %zmm4, %zmm4, %k1 +; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; @@ -360,24 +384,14 @@ ; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6 ; AVX512F-NEXT: vpcmpeqd %zmm6, %zmm0, %k1 ; AVX512F-NEXT: vpcmpeqd %zmm6, %zmm1, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm6 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm6 +; AVX512F-NEXT: vptestmd %zmm6, %zmm6, %k1 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} -; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX512F-NEXT: vpblendvb %ymm6, %ymm1, %ymm0, %ymm1 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512F-NEXT: vpsllw $15, %ymm0, %ymm0 -; AVX512F-NEXT: vpsraw $15, %ymm0, %ymm0 -; AVX512F-NEXT: vpblendvb %ymm0, %ymm2, %ymm4, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1 -; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512F-NEXT: vpsllw $15, %ymm1, %ymm1 -; AVX512F-NEXT: vpsraw $15, %ymm1, %ymm1 +; AVX512F-NEXT: vpmovdw %zmm0, %ymm1 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm2, %ymm4, %ymm0 ; AVX512F-NEXT: vpblendvb %ymm1, %ymm3, %ymm5, %ymm1 ; AVX512F-NEXT: retq ; @@ -386,24 +400,14 @@ ; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6 ; AVX512VL-NEXT: vpcmpeqd %zmm6, %zmm0, %k1 ; AVX512VL-NEXT: vpcmpeqd %zmm6, %zmm1, %k2 +; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm6 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm6 +; AVX512VL-NEXT: vptestmd %zmm6, %zmm6, %k1 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} -; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX512VL-NEXT: vpblendvb %ymm6, %ymm1, %ymm0, %ymm1 -; AVX512VL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512VL-NEXT: vpsllw $15, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsraw $15, %ymm0, %ymm0 -; AVX512VL-NEXT: vpblendvb %ymm0, %ymm2, %ymm4, %ymm0 -; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm1 -; AVX512VL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512VL-NEXT: vpsllw $15, %ymm1, %ymm1 -; AVX512VL-NEXT: vpsraw $15, %ymm1, %ymm1 +; AVX512VL-NEXT: vpmovdw %zmm0, %ymm1 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm4, %ymm0 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm5, %ymm1 ; AVX512VL-NEXT: retq ; @@ -433,18 +437,14 @@ ; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4 ; AVX512F-NEXT: vpcmpeqd %zmm4, %zmm0, %k1 ; AVX512F-NEXT: vpcmpeqd %zmm4, %zmm1, %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm4 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512F-NEXT: vpermi2d %zmm0, %zmm1, %zmm4 +; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k1 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} -; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512F-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX512F-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0 ; AVX512F-NEXT: retq ; @@ -453,18 +453,14 @@ ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 ; AVX512VL-NEXT: vpcmpeqd %zmm4, %zmm0, %k1 ; AVX512VL-NEXT: vpcmpeqd %zmm4, %zmm1, %k2 +; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm4 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0] +; AVX512VL-NEXT: vpermi2d %zmm0, %zmm1, %zmm4 +; AVX512VL-NEXT: vptestmd %zmm4, %zmm4, %k1 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} -; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u] -; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16] -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0] -; AVX512VL-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm0 ; AVX512VL-NEXT: retq ; @@ -852,65 +848,33 @@ define i64 @shuf64i1_zero(i64 %a) { ; AVX512F-LABEL: shuf64i1_zero: ; AVX512F: # %bb.0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: .cfi_def_cfa_offset 16 -; AVX512F-NEXT: .cfi_offset %rbp, -16 -; AVX512F-NEXT: movq %rsp, %rbp -; AVX512F-NEXT: .cfi_def_cfa_register %rbp -; AVX512F-NEXT: andq $-32, %rsp -; AVX512F-NEXT: subq $96, %rsp -; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vpbroadcastd %xmm0, %zmm0 ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, (%rsp) -; AVX512F-NEXT: movl (%rsp), %ecx +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: shll $16, %ecx +; AVX512F-NEXT: orl %eax, %ecx ; AVX512F-NEXT: movq %rcx, %rax ; AVX512F-NEXT: shlq $32, %rax ; AVX512F-NEXT: orq %rcx, %rax -; AVX512F-NEXT: movq %rbp, %rsp -; AVX512F-NEXT: popq %rbp ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: shuf64i1_zero: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: pushq %rbp -; AVX512VL-NEXT: .cfi_def_cfa_offset 16 -; AVX512VL-NEXT: .cfi_offset %rbp, -16 -; AVX512VL-NEXT: movq %rsp, %rbp -; AVX512VL-NEXT: .cfi_def_cfa_register %rbp -; AVX512VL-NEXT: andq $-32, %rsp -; AVX512VL-NEXT: subq $96, %rsp -; AVX512VL-NEXT: movl %edi, {{[0-9]+}}(%rsp) -; AVX512VL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; AVX512VL-NEXT: kmovw %edi, %k1 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vpmovsxbd %xmm1, %zmm1 -; AVX512VL-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512VL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512VL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; AVX512VL-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512VL-NEXT: vpbroadcastd %xmm0, %zmm0 ; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512VL-NEXT: kmovw %k0, (%rsp) -; AVX512VL-NEXT: movl (%rsp), %ecx +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: kmovw %k0, %ecx +; AVX512VL-NEXT: shll $16, %ecx +; AVX512VL-NEXT: orl %eax, %ecx ; AVX512VL-NEXT: movq %rcx, %rax ; AVX512VL-NEXT: shlq $32, %rax ; AVX512VL-NEXT: orq %rcx, %rax -; AVX512VL-NEXT: movq %rbp, %rsp -; AVX512VL-NEXT: popq %rbp ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ;