Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1365,6 +1365,12 @@ setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64); setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64); } + + // Need to custom split v32i16/v64i8 bitcasts. + if (!Subtarget.hasBWI()) { + setOperationAction(ISD::BITCAST, MVT::v32i16, Custom); + setOperationAction(ISD::BITCAST, MVT::v64i8, Custom); + } }// has AVX-512 // This block controls legalization for operations that don't have @@ -23688,6 +23694,19 @@ return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); } + // Custom splitting for BWI types when AVX512F is available but BWI isn't. + if ((SrcVT == MVT::v32i16 || SrcVT == MVT::v64i8) && DstVT.isVector() && + DAG.getTargetLoweringInfo().isTypeLegal(DstVT)) { + SDLoc dl(Op); + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl); + EVT CastVT = MVT::getVectorVT(DstVT.getVectorElementType(), + DstVT.getVectorNumElements() / 2); + Lo = DAG.getBitcast(CastVT, Lo); + Hi = DAG.getBitcast(CastVT, Hi); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, DstVT, Lo, Hi); + } + if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 || SrcVT == MVT::i64) { assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); @@ -25077,6 +25096,19 @@ return; } + // Custom splitting for BWI types when AVX512F is available but BWI isn't. + if ((DstVT == MVT::v32i16 || DstVT == MVT::v64i8) && + SrcVT.isVector() && isTypeLegal(SrcVT)) { + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); + MVT CastVT = (DstVT == MVT::v32i16) ? MVT::v16i16 : MVT::v32i8; + Lo = DAG.getBitcast(CastVT, Lo); + Hi = DAG.getBitcast(CastVT, Hi); + SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, DstVT, Lo, Hi); + Results.push_back(Res); + return; + } + if (SrcVT != MVT::f64 || (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8)) return; Index: test/CodeGen/X86/avx512vl-vec-masked-cmp.ll =================================================================== --- test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -926,182 +926,14 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm6, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm7, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm7, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm5, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm8, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: vmovq %xmm1, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax @@ -1127,91 +959,7 @@ ; ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; NoVLX-NEXT: vmovq %xmm1, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -1245,190 +993,22 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3 -; NoVLX-NEXT: vmovq %xmm3, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm4 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm5 -; NoVLX-NEXT: vmovq %xmm9, %rax -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm4 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm2, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm5, %xmm5 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm6, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm5 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm7, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm0 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm4, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7 -; NoVLX-NEXT: movl %eax, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %eax, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %edx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm1, %rax -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %eax, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %eax, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1 -; NoVLX-NEXT: movl %edx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rdx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: andl %edi, %ecx +; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 +; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm5, %ymm2 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: andl %edi, %edx -; NoVLX-NEXT: shll $16, %edx -; NoVLX-NEXT: movzwl %cx, %eax -; NoVLX-NEXT: orl %edx, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: andl %edi, %ecx +; NoVLX-NEXT: shll $16, %ecx +; NoVLX-NEXT: movzwl %ax, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -1453,97 +1033,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm1, %rax -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm4, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vmovd %eax, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm0, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vmovd %eax, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1 -; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6319,182 +5815,14 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm6, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm7, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm7, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm5, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm8, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: vmovq %xmm1, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: shll $16, %eax @@ -6520,91 +5848,7 @@ ; ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; NoVLX-NEXT: vmovq %xmm1, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -6638,190 +5882,22 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3 -; NoVLX-NEXT: vmovq %xmm3, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm4 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm5 -; NoVLX-NEXT: vmovq %xmm9, %rax -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm4 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm2, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm5, %xmm5 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm6, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm5 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm7, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm0 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm4, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7 -; NoVLX-NEXT: movl %eax, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %eax, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %edx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm1, %rax -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %eax, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %eax, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1 -; NoVLX-NEXT: movl %edx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rdx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: andl %edi, %ecx +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 +; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm5, %ymm2 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: andl %edi, %edx -; NoVLX-NEXT: shll $16, %edx -; NoVLX-NEXT: movzwl %cx, %eax -; NoVLX-NEXT: orl %edx, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: andl %edi, %ecx +; NoVLX-NEXT: shll $16, %ecx +; NoVLX-NEXT: movzwl %ax, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -6846,97 +5922,13 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm1, %rax -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm4, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vmovd %eax, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm0, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vmovd %eax, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1 -; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -11796,185 +10788,17 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm6, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm7, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm7, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm5, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm8, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: vmovq %xmm1, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0 +; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12001,95 +10825,11 @@ ; ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; NoVLX-NEXT: vmovq %xmm1, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm2 +; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm3 +; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 -; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2 -; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 @@ -12125,194 +10865,26 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3 -; NoVLX-NEXT: vmovq %xmm3, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm4 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vmovq %xmm9, %rax -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm2, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm6, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm4 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm7, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm0 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm5, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7 -; NoVLX-NEXT: movl %eax, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %eax, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %edx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm1, %rax -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %eax, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %eax, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1 -; NoVLX-NEXT: movl %edx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rdx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1 -; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: andl %edi, %ecx +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2 +; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 +; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 +; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm4, %ymm2 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: andl %edi, %edx -; NoVLX-NEXT: shll $16, %edx -; NoVLX-NEXT: movzwl %cx, %eax -; NoVLX-NEXT: orl %edx, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: andl %edi, %ecx +; NoVLX-NEXT: shll $16, %ecx +; NoVLX-NEXT: movzwl %ax, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -12337,100 +10909,16 @@ ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm1, %rax -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm4, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vmovd %eax, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm0, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vmovd %eax, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 -; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1 -; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1 +; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 @@ -17309,185 +15797,17 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8 -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm6, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm7, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm7, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm5, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm8, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm5 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpextrq $1, %xmm8, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 -; NoVLX-NEXT: vmovq %xmm1, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 -; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 -; NoVLX-NEXT: vpmaxuw %ymm2, %ymm0, %ymm2 -; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 +; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: vpmaxuw %ymm3, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: vpmaxuw %ymm3, %ymm2, %ymm0 +; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 @@ -17516,91 +15836,7 @@ ; ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 -; NoVLX-NEXT: vmovq %xmm1, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm2 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm4, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm3, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm2 ; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 @@ -17640,196 +15876,28 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm9 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3 -; NoVLX-NEXT: vmovq %xmm3, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm4 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 -; NoVLX-NEXT: vmovq %xmm9, %rax -; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 -; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6 -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm3, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm2, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm10 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm6, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm4 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm6, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm6 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm7, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm8 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm0 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpextrq $1, %xmm7, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vmovq %xmm5, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm0, %xmm0 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm7 -; NoVLX-NEXT: movl %eax, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %eax, %xmm2 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpextrq $1, %xmm9, %rdx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movl %edx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm1, %rax -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %eax, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %eax, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rdx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm3, %xmm1 -; NoVLX-NEXT: movl %edx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rdx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm1, %xmm1 -; NoVLX-NEXT: vpextrq $1, %xmm5, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm3 -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; NoVLX-NEXT: vpmaxuw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm2 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 -; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 -; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 -; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 -; NoVLX-NEXT: kmovw %k0, %ecx -; NoVLX-NEXT: andl %edi, %ecx +; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 +; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 +; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 +; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 +; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 +; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 +; NoVLX-NEXT: kmovw %k0, %eax +; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vinserti128 $1, %xmm10, %ymm4, %ymm1 -; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 -; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 -; NoVLX-NEXT: vpmaxuw %ymm0, %ymm1, %ymm0 -; NoVLX-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 +; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, %edx -; NoVLX-NEXT: andl %edi, %edx -; NoVLX-NEXT: shll $16, %edx -; NoVLX-NEXT: movzwl %cx, %eax -; NoVLX-NEXT: orl %edx, %eax +; NoVLX-NEXT: kmovw %k0, %ecx +; NoVLX-NEXT: andl %edi, %ecx +; NoVLX-NEXT: shll $16, %ecx +; NoVLX-NEXT: movzwl %ax, %eax +; NoVLX-NEXT: orl %ecx, %eax ; NoVLX-NEXT: vzeroupper ; NoVLX-NEXT: retq entry: @@ -17854,100 +15922,16 @@ ; ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 -; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 -; NoVLX-NEXT: vmovq %xmm2, %rax -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: movq %rax, %rdx -; NoVLX-NEXT: vmovd %eax, %xmm3 -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 -; NoVLX-NEXT: vmovq %xmm1, %rax -; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm2, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 -; NoVLX-NEXT: vmovq %xmm4, %rcx -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm2, %xmm2 -; NoVLX-NEXT: movl %ecx, %edx -; NoVLX-NEXT: shrl $16, %edx -; NoVLX-NEXT: vmovd %ecx, %xmm3 -; NoVLX-NEXT: vpinsrw $1, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rcx, %rdx -; NoVLX-NEXT: shrq $32, %rdx -; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpextrq $1, %xmm4, %rdx -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %edx, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vpinsrw $4, %edx, %xmm3, %xmm3 -; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movq %rdx, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vmovd %eax, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm1 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1 -; NoVLX-NEXT: vmovq %xmm0, %rax -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1 -; NoVLX-NEXT: movl %eax, %ecx -; NoVLX-NEXT: shrl $16, %ecx -; NoVLX-NEXT: vmovd %eax, %xmm4 -; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: movq %rax, %rcx -; NoVLX-NEXT: shrq $32, %rcx -; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm4, %xmm4 -; NoVLX-NEXT: vpextrq $1, %xmm0, %rcx -; NoVLX-NEXT: shrq $48, %rax -; NoVLX-NEXT: vpinsrw $3, %eax, %xmm4, %xmm0 -; NoVLX-NEXT: movl %ecx, %eax -; NoVLX-NEXT: shrl $16, %eax -; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: movq %rcx, %rax -; NoVLX-NEXT: shrq $32, %rax -; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; NoVLX-NEXT: shrq $48, %rcx -; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0 -; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1 -; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 -; NoVLX-NEXT: shrq $48, %rdx -; NoVLX-NEXT: vpinsrw $7, %edx, %xmm3, %xmm1 -; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 -; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 -; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1 +; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 +; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 +; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 +; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: andl %edi, %eax ; NoVLX-NEXT: shrl $16, %edi -; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm0 +; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NoVLX-NEXT: vpmaxuw 32(%rsi), %ymm0, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0