Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -29266,16 +29266,22 @@ // truncating the result of the compare to 128-bits. break; case MVT::v32i1: - // TODO: Handle pre-AVX2 cases by splitting to two v16i1's. - if (!Subtarget.hasInt256()) - return SDValue(); SExtVT = MVT::v32i8; break; }; SDLoc DL(BitCast); SDValue V = DAG.getSExtOrTrunc(N0, DL, SExtVT); - if (SExtVT == MVT::v8i16) { + if (SExtVT == MVT::v32i8 && !Subtarget.hasInt256()) { + // Handle pre-AVX2 cases by splitting to two v16i1's. + SDValue Lo = extract128BitVector(V, 0, DAG, DL); + SDValue Hi = extract128BitVector(V, 16, DAG, DL); + Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo); + Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi); + Hi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi, DAG.getIntPtrConstant(16, DL)); + V = DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi); + return DAG.getZExtOrTrunc(V, DL, VT); + } else if (SExtVT == MVT::v8i16) { V = DAG.getBitcast(MVT::v16i8, V); V = DAG.getVectorShuffle( MVT::v16i8, DL, V, DAG.getUNDEF(MVT::v16i8), Index: test/CodeGen/X86/bitcast-and-setcc-256.ll =================================================================== --- test/CodeGen/X86/bitcast-and-setcc-256.ll +++ test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -456,233 +456,28 @@ ; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5 -; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-SSSE3-NEXT: andb $1, %cl -; SSE2-SSSE3-NEXT: movb %cl, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-SSSE3-NEXT: andb $1, %cl -; SSE2-SSSE3-NEXT: movb %cl, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %ecx -; SSE2-SSSE3-NEXT: shll $16, %ecx -; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %ecx +; SSE2-SSSE3-NEXT: pmovmskb %xmm5, %eax +; SSE2-SSSE3-NEXT: shll $16, %eax ; SSE2-SSSE3-NEXT: orl %ecx, %eax ; SSE2-SSSE3-NEXT: retq ; ; AVX1-LABEL: v32i8: ; AVX1: # BB#0: -; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: .Lcfi0: -; AVX1-NEXT: .cfi_def_cfa_offset 16 -; AVX1-NEXT: .Lcfi1: -; AVX1-NEXT: .cfi_offset %rbp, -16 -; AVX1-NEXT: movq %rsp, %rbp -; AVX1-NEXT: .Lcfi2: -; AVX1-NEXT: .cfi_def_cfa_register %rbp -; AVX1-NEXT: andq $-32, %rsp -; AVX1-NEXT: subq $32, %rsp ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 ; AVX1-NEXT: vpcmpgtb %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vpcmpgtb %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 +; AVX1-NEXT: vpcmpgtb %xmm1, %xmm5, %xmm1 +; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 -; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpextrb $15, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $14, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $13, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $12, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $11, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $10, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $9, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $8, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $7, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $6, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $5, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $4, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $3, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $2, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $1, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $0, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $15, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $14, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $13, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $12, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $11, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $10, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $9, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $8, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $7, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $6, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $5, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $4, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $3, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $2, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $1, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $0, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: movl (%rsp), %eax -; AVX1-NEXT: movq %rbp, %rsp -; AVX1-NEXT: popq %rbp +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: vpmovmskb %xmm1, %eax +; AVX1-NEXT: shll $16, %eax +; AVX1-NEXT: orl %ecx, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; Index: test/CodeGen/X86/bitcast-and-setcc-512.ll =================================================================== --- test/CodeGen/X86/bitcast-and-setcc-512.ll +++ test/CodeGen/X86/bitcast-and-setcc-512.ll @@ -2,8 +2,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512,AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW define i8 @v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i64> %d) { ; SSE-LABEL: v8i64: @@ -248,9 +248,9 @@ define i32 @v32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i16> %d) { ; SSE-LABEL: v32i16: ; SSE: # BB#0: +; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm10 -; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 ; SSE-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 ; SSE-NEXT: pcmpgtw %xmm5, %xmm1 ; SSE-NEXT: movdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> @@ -258,137 +258,48 @@ ; SSE-NEXT: pcmpgtw %xmm4, %xmm0 ; SSE-NEXT: pshufb %xmm5, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: psllw $7, %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm12 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; SSE-NEXT: pand %xmm12, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm4, %xmm4 +; SSE-NEXT: pcmpgtb %xmm0, %xmm4 ; SSE-NEXT: pcmpgtw %xmm7, %xmm3 ; SSE-NEXT: pshufb %xmm5, %xmm3 ; SSE-NEXT: pcmpgtw %xmm6, %xmm2 ; SSE-NEXT: pshufb %xmm5, %xmm2 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSE-NEXT: psllw $7, %xmm2 +; SSE-NEXT: pand %xmm12, %xmm2 +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm11 ; SSE-NEXT: pshufb %xmm5, %xmm11 -; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8 -; SSE-NEXT: pshufb %xmm5, %xmm8 -; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm11[0] -; SSE-NEXT: pand %xmm0, %xmm8 ; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm10 ; SSE-NEXT: pshufb %xmm5, %xmm10 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm10 = xmm10[0],xmm11[0] +; SSE-NEXT: psllw $7, %xmm10 +; SSE-NEXT: pand %xmm12, %xmm10 +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pcmpgtb %xmm10, %xmm2 +; SSE-NEXT: pand %xmm4, %xmm2 ; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm9 ; SSE-NEXT: pshufb %xmm5, %xmm9 -; SSE-NEXT: punpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm10[0] -; SSE-NEXT: pand %xmm2, %xmm9 -; SSE-NEXT: pextrb $15, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $14, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $13, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $12, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $11, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $10, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $9, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $8, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $7, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $6, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $5, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $4, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $3, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $2, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $1, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $0, %xmm9, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $15, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $14, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $13, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $12, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $11, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $10, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $9, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $8, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $7, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $6, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $5, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $4, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $3, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $2, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $1, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $0, %xmm8, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: pcmpgtw {{[0-9]+}}(%rsp), %xmm8 +; SSE-NEXT: pshufb %xmm5, %xmm8 +; SSE-NEXT: punpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm9[0] +; SSE-NEXT: psllw $7, %xmm8 +; SSE-NEXT: pand %xmm12, %xmm8 +; SSE-NEXT: pcmpgtb %xmm8, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pmovmskb %xmm2, %ecx +; SSE-NEXT: pmovmskb %xmm1, %eax +; SSE-NEXT: shll $16, %eax ; SSE-NEXT: orl %ecx, %eax ; SSE-NEXT: retq ; ; AVX1-LABEL: v32i16: ; AVX1: # BB#0: -; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: .Lcfi0: -; AVX1-NEXT: .cfi_def_cfa_offset 16 -; AVX1-NEXT: .Lcfi1: -; AVX1-NEXT: .cfi_offset %rbp, -16 -; AVX1-NEXT: movq %rsp, %rbp -; AVX1-NEXT: .Lcfi2: -; AVX1-NEXT: .cfi_def_cfa_register %rbp -; AVX1-NEXT: andq $-32, %rsp -; AVX1-NEXT: subq $32, %rsp ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm8 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9 ; AVX1-NEXT: vpcmpgtw %xmm8, %xmm9, %xmm8 @@ -399,119 +310,30 @@ ; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm0, %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpcmpgtw %xmm7, %xmm5, %xmm2 ; AVX1-NEXT: vpacksswb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpand %xmm1, %xmm8, %xmm1 ; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpcmpgtw %xmm6, %xmm4, %xmm3 ; AVX1-NEXT: vpacksswb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 -; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpextrb $15, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $14, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $13, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $12, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $11, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $10, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $9, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $8, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $7, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $6, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $5, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $4, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $3, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $2, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $1, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $0, %xmm1, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $15, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $14, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $13, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $12, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $11, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $10, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $9, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $8, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $7, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $6, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $5, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $4, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $3, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $2, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $1, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $0, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: movl (%rsp), %eax -; AVX1-NEXT: movq %rbp, %rsp -; AVX1-NEXT: popq %rbp +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: vpsllw $7, %xmm1, %xmm0 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: shll $16, %eax +; AVX1-NEXT: orl %ecx, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -1353,12 +1175,12 @@ ; AVX1-LABEL: v64i8: ; AVX1: # BB#0: ; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: .Lcfi3: +; AVX1-NEXT: .Lcfi0: ; AVX1-NEXT: .cfi_def_cfa_offset 16 -; AVX1-NEXT: .Lcfi4: +; AVX1-NEXT: .Lcfi1: ; AVX1-NEXT: .cfi_offset %rbp, -16 ; AVX1-NEXT: movq %rsp, %rbp -; AVX1-NEXT: .Lcfi5: +; AVX1-NEXT: .Lcfi2: ; AVX1-NEXT: .cfi_def_cfa_register %rbp ; AVX1-NEXT: andq $-32, %rsp ; AVX1-NEXT: subq $64, %rsp Index: test/CodeGen/X86/bitcast-setcc-256.ll =================================================================== --- test/CodeGen/X86/bitcast-setcc-256.ll +++ test/CodeGen/X86/bitcast-setcc-256.ll @@ -160,227 +160,24 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8> %b) { ; SSE2-SSSE3-LABEL: v32i8: ; SSE2-SSSE3: # BB#0: -; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1 -; SSE2-SSSE3-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) ; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0 -; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-SSSE3-NEXT: andb $1, %cl -; SSE2-SSSE3-NEXT: movb %cl, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al -; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %cl -; SSE2-SSSE3-NEXT: andb $1, %cl -; SSE2-SSSE3-NEXT: movb %cl, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: andb $1, %al -; SSE2-SSSE3-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %ecx -; SSE2-SSSE3-NEXT: shll $16, %ecx -; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %ecx +; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1 +; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %eax +; SSE2-SSSE3-NEXT: shll $16, %eax ; SSE2-SSSE3-NEXT: orl %ecx, %eax ; SSE2-SSSE3-NEXT: retq ; ; AVX1-LABEL: v32i8: ; AVX1: # BB#0: -; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: .Lcfi0: -; AVX1-NEXT: .cfi_def_cfa_offset 16 -; AVX1-NEXT: .Lcfi1: -; AVX1-NEXT: .cfi_offset %rbp, -16 -; AVX1-NEXT: movq %rsp, %rbp -; AVX1-NEXT: .Lcfi2: -; AVX1-NEXT: .cfi_def_cfa_register %rbp -; AVX1-NEXT: andq $-32, %rsp -; AVX1-NEXT: subq $32, %rsp -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpextrb $15, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $14, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $13, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $12, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $11, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $10, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $9, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $8, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $7, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $6, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $5, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $4, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $3, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $2, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $1, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $0, %xmm2, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) +; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vpmovmskb %xmm2, %ecx +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $15, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $14, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $13, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $12, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $11, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $10, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $9, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $8, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $7, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $6, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $5, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $4, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $3, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $2, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $1, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $0, %xmm0, %eax -; AVX1-NEXT: andb $1, %al -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: movl (%rsp), %eax -; AVX1-NEXT: movq %rbp, %rsp -; AVX1-NEXT: popq %rbp +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: shll $16, %eax +; AVX1-NEXT: orl %ecx, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; Index: test/CodeGen/X86/bitcast-setcc-512.ll =================================================================== --- test/CodeGen/X86/bitcast-setcc-512.ll +++ test/CodeGen/X86/bitcast-setcc-512.ll @@ -2,237 +2,40 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512,AVX512F -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW define i32 @v32i16(<32 x i16> %a, <32 x i16> %b) { ; SSE-LABEL: v32i16: ; SSE: # BB#0: -; SSE-NEXT: pcmpgtw %xmm7, %xmm3 -; SSE-NEXT: pextrb $14, %xmm3, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $12, %xmm3, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $10, %xmm3, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $8, %xmm3, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $6, %xmm3, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $4, %xmm3, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $2, %xmm3, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $0, %xmm3, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pcmpgtw %xmm6, %xmm2 -; SSE-NEXT: pextrb $14, %xmm2, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $12, %xmm2, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $10, %xmm2, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $8, %xmm2, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $6, %xmm2, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $4, %xmm2, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $2, %xmm2, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $0, %xmm2, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; SSE-NEXT: pcmpgtw %xmm5, %xmm1 -; SSE-NEXT: pextrb $14, %xmm1, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $12, %xmm1, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $10, %xmm1, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $8, %xmm1, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $6, %xmm1, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $4, %xmm1, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $2, %xmm1, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $0, %xmm1, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; SSE-NEXT: pcmpgtw %xmm4, %xmm0 -; SSE-NEXT: pextrb $14, %xmm0, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $12, %xmm0, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $10, %xmm0, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $8, %xmm0, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $6, %xmm0, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $4, %xmm0, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $2, %xmm0, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: pextrb $0, %xmm0, %eax -; SSE-NEXT: andb $1, %al -; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movl -{{[0-9]+}}(%rsp), %ecx -; SSE-NEXT: shll $16, %ecx -; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: pmovmskb %xmm0, %ecx +; SSE-NEXT: pcmpgtw %xmm7, %xmm3 +; SSE-NEXT: pcmpgtw %xmm6, %xmm2 +; SSE-NEXT: packsswb %xmm3, %xmm2 +; SSE-NEXT: pmovmskb %xmm2, %eax +; SSE-NEXT: shll $16, %eax ; SSE-NEXT: orl %ecx, %eax ; SSE-NEXT: retq ; ; AVX1-LABEL: v32i16: ; AVX1: # BB#0: -; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: .Lcfi0: -; AVX1-NEXT: .cfi_def_cfa_offset 16 -; AVX1-NEXT: .Lcfi1: -; AVX1-NEXT: .cfi_offset %rbp, -16 -; AVX1-NEXT: movq %rsp, %rbp -; AVX1-NEXT: .Lcfi2: -; AVX1-NEXT: .cfi_def_cfa_register %rbp -; AVX1-NEXT: andq $-32, %rsp -; AVX1-NEXT: subq $32, %rsp -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 ; AVX1-NEXT: vpcmpgtw %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vpextrb $14, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $12, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $10, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $8, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $6, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $4, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $2, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $0, %xmm4, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpextrb $14, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $12, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $10, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $8, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $6, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $4, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $2, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $0, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpcmpgtw %xmm1, %xmm3, %xmm1 -; AVX1-NEXT: vpextrb $14, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $12, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $10, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $8, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $6, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $4, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $2, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $0, %xmm1, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) ; AVX1-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $14, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $12, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $10, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $8, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $6, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $4, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $2, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: vpextrb $0, %xmm0, %eax -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: movb %al, (%rsp) -; AVX1-NEXT: movl (%rsp), %eax -; AVX1-NEXT: movq %rbp, %rsp -; AVX1-NEXT: popq %rbp +; AVX1-NEXT: vpacksswb %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %ecx +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpcmpgtw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpacksswb %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vpmovmskb %xmm0, %eax +; AVX1-NEXT: shll $16, %eax +; AVX1-NEXT: orl %ecx, %eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -757,12 +560,12 @@ ; AVX1-LABEL: v64i8: ; AVX1: # BB#0: ; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: .Lcfi3: +; AVX1-NEXT: .Lcfi0: ; AVX1-NEXT: .cfi_def_cfa_offset 16 -; AVX1-NEXT: .Lcfi4: +; AVX1-NEXT: .Lcfi1: ; AVX1-NEXT: .cfi_offset %rbp, -16 ; AVX1-NEXT: movq %rsp, %rbp -; AVX1-NEXT: .Lcfi5: +; AVX1-NEXT: .Lcfi2: ; AVX1-NEXT: .cfi_def_cfa_register %rbp ; AVX1-NEXT: andq $-32, %rsp ; AVX1-NEXT: subq $64, %rsp