diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -17830,6 +17830,14 @@ return SDValue(); if (VT.getSizeInBits() == 8) { + // If IdxVal is 0, it's cheaper to do a move instead of a pextrb, unless + // we're going to zero extend the register or fold the store. + if (llvm::isNullConstant(Idx) && !MayFoldIntoZeroExtend(Op) && + !MayFoldIntoStore(Op)) + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, + DAG.getBitcast(MVT::v4i32, Vec), Idx)); + SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Vec, Idx); return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); } diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll --- a/llvm/test/CodeGen/X86/avg.ll +++ b/llvm/test/CodeGen/X86/avg.ll @@ -2598,17 +2598,17 @@ ; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero ; AVX512BW-NEXT: vmovq %xmm2, %rdx -; AVX512BW-NEXT: vpextrq $1, %xmm2, %r10 +; AVX512BW-NEXT: vpextrq $1, %xmm2, %r15 ; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2 ; AVX512BW-NEXT: vmovq %xmm2, %r8 ; AVX512BW-NEXT: vpextrq $1, %xmm2, %r9 ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm1 ; AVX512BW-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero -; AVX512BW-NEXT: vmovq %xmm2, %r14 -; AVX512BW-NEXT: vpextrq $1, %xmm2, %r12 -; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2 ; AVX512BW-NEXT: vmovq %xmm2, %r11 +; AVX512BW-NEXT: vpextrq $1, %xmm2, %r10 +; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2 +; AVX512BW-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; AVX512BW-NEXT: vpextrq $1, %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm1 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero @@ -2617,23 +2617,24 @@ ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm1 ; AVX512BW-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero -; AVX512BW-NEXT: vmovq %xmm3, %rax -; AVX512BW-NEXT: addq %rbx, %rax -; AVX512BW-NEXT: movq %rax, %rbx -; AVX512BW-NEXT: vpextrq $1, %xmm3, %r13 -; AVX512BW-NEXT: addq %rbp, %r13 -; AVX512BW-NEXT: vextracti128 $1, %ymm3, %xmm3 ; AVX512BW-NEXT: vmovq %xmm3, %rcx -; AVX512BW-NEXT: addq %rdi, %rcx +; AVX512BW-NEXT: addq %rbx, %rcx +; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax +; AVX512BW-NEXT: addq %rbp, %rax +; AVX512BW-NEXT: movq %rax, %rbp +; AVX512BW-NEXT: vextracti128 $1, %ymm3, %xmm3 +; AVX512BW-NEXT: vmovq %xmm3, %r14 +; AVX512BW-NEXT: addq %rdi, %r14 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax ; AVX512BW-NEXT: addq %rsi, %rax ; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero -; AVX512BW-NEXT: vmovq %xmm2, %r15 -; AVX512BW-NEXT: addq %rdx, %r15 -; AVX512BW-NEXT: vpextrq $1, %xmm2, %rbp -; AVX512BW-NEXT: addq %r10, %rbp +; AVX512BW-NEXT: vmovq %xmm2, %rax +; AVX512BW-NEXT: addq %rdx, %rax +; AVX512BW-NEXT: movq %rax, %rdx +; AVX512BW-NEXT: vpextrq $1, %xmm2, %r12 +; AVX512BW-NEXT: addq %r15, %r12 ; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2 ; AVX512BW-NEXT: vmovq %xmm2, %rax ; AVX512BW-NEXT: addq %r8, %rax @@ -2645,54 +2646,54 @@ ; AVX512BW-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX512BW-NEXT: vmovq %xmm2, %rax -; AVX512BW-NEXT: addq %r14, %rax +; AVX512BW-NEXT: addq %r11, %rax ; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512BW-NEXT: vpextrq $1, %xmm2, %rax -; AVX512BW-NEXT: addq %r12, %rax +; AVX512BW-NEXT: addq %r10, %rax ; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512BW-NEXT: vextracti128 $1, %ymm2, %xmm2 -; AVX512BW-NEXT: vmovq %xmm2, %r12 -; AVX512BW-NEXT: addq %r11, %r12 -; AVX512BW-NEXT: vpextrq $1, %xmm2, %r11 -; AVX512BW-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Folded Reload +; AVX512BW-NEXT: vmovq %xmm2, %r13 +; AVX512BW-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload +; AVX512BW-NEXT: vpextrq $1, %xmm2, %rbx +; AVX512BW-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload ; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512BW-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX512BW-NEXT: vmovq %xmm0, %r9 +; AVX512BW-NEXT: vmovq %xmm0, %r10 +; AVX512BW-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload +; AVX512BW-NEXT: vpextrq $1, %xmm0, %r9 ; AVX512BW-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload -; AVX512BW-NEXT: vpextrq $1, %xmm0, %r8 -; AVX512BW-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload ; AVX512BW-NEXT: vmovq %xmm1, %rax ; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512BW-NEXT: vmovq %xmm0, %rsi -; AVX512BW-NEXT: addq %rax, %rsi +; AVX512BW-NEXT: vmovq %xmm0, %r8 +; AVX512BW-NEXT: addq %rax, %r8 ; AVX512BW-NEXT: vpextrq $1, %xmm1, %rdi -; AVX512BW-NEXT: vpextrq $1, %xmm0, %rdx -; AVX512BW-NEXT: addq %rdi, %rdx -; AVX512BW-NEXT: addq $-1, %rbx -; AVX512BW-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512BW-NEXT: movl $0, %r10d -; AVX512BW-NEXT: adcq $-1, %r10 -; AVX512BW-NEXT: addq $-1, %r13 -; AVX512BW-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512BW-NEXT: movl $0, %eax -; AVX512BW-NEXT: adcq $-1, %rax -; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512BW-NEXT: vpextrq $1, %xmm0, %rsi +; AVX512BW-NEXT: addq %rdi, %rsi ; AVX512BW-NEXT: addq $-1, %rcx -; AVX512BW-NEXT: movq %rcx, (%rsp) # 8-byte Spill -; AVX512BW-NEXT: movl $0, %r14d -; AVX512BW-NEXT: adcq $-1, %r14 -; AVX512BW-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill -; AVX512BW-NEXT: movl $0, %ebx -; AVX512BW-NEXT: adcq $-1, %rbx -; AVX512BW-NEXT: addq $-1, %r15 -; AVX512BW-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512BW-NEXT: movl $0, %r13d -; AVX512BW-NEXT: adcq $-1, %r13 +; AVX512BW-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512BW-NEXT: movl $0, %r11d +; AVX512BW-NEXT: adcq $-1, %r11 ; AVX512BW-NEXT: addq $-1, %rbp ; AVX512BW-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512BW-NEXT: movl $0, %edi +; AVX512BW-NEXT: adcq $-1, %rdi +; AVX512BW-NEXT: addq $-1, %r14 +; AVX512BW-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512BW-NEXT: movl $0, %r15d ; AVX512BW-NEXT: adcq $-1, %r15 ; AVX512BW-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill +; AVX512BW-NEXT: movl $0, %r14d +; AVX512BW-NEXT: adcq $-1, %r14 +; AVX512BW-NEXT: addq $-1, %rdx +; AVX512BW-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512BW-NEXT: movl $0, %eax +; AVX512BW-NEXT: adcq $-1, %rax +; AVX512BW-NEXT: movq %rax, (%rsp) # 8-byte Spill +; AVX512BW-NEXT: addq $-1, %r12 +; AVX512BW-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512BW-NEXT: movl $0, %r12d +; AVX512BW-NEXT: adcq $-1, %r12 +; AVX512BW-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill ; AVX512BW-NEXT: movl $0, %eax ; AVX512BW-NEXT: adcq $-1, %rax ; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -2708,14 +2709,17 @@ ; AVX512BW-NEXT: movl $0, %eax ; AVX512BW-NEXT: adcq $-1, %rax ; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512BW-NEXT: addq $-1, %r12 +; AVX512BW-NEXT: addq $-1, %r13 ; AVX512BW-NEXT: movl $0, %eax ; AVX512BW-NEXT: adcq $-1, %rax ; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; AVX512BW-NEXT: addq $-1, %r11 +; AVX512BW-NEXT: addq $-1, %rbx ; AVX512BW-NEXT: movl $0, %eax ; AVX512BW-NEXT: adcq $-1, %rax ; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX512BW-NEXT: addq $-1, %r10 +; AVX512BW-NEXT: movl $0, %edx +; AVX512BW-NEXT: adcq $-1, %rdx ; AVX512BW-NEXT: addq $-1, %r9 ; AVX512BW-NEXT: movl $0, %ecx ; AVX512BW-NEXT: adcq $-1, %rcx @@ -2725,118 +2729,114 @@ ; AVX512BW-NEXT: addq $-1, %rsi ; AVX512BW-NEXT: movl $0, %ebp ; AVX512BW-NEXT: adcq $-1, %rbp -; AVX512BW-NEXT: addq $-1, %rdx -; AVX512BW-NEXT: movl $0, %edi -; AVX512BW-NEXT: adcq $-1, %rdi -; AVX512BW-NEXT: shldq $63, %rdx, %rdi -; AVX512BW-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512BW-NEXT: shldq $63, %rsi, %rbp +; AVX512BW-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512BW-NEXT: shldq $63, %r8, %rax ; AVX512BW-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512BW-NEXT: shldq $63, %r9, %rcx -; AVX512BW-NEXT: movq %rcx, %r8 -; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; AVX512BW-NEXT: shldq $63, %r11, %r9 -; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload -; AVX512BW-NEXT: shldq $63, %r12, %r11 -; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload +; AVX512BW-NEXT: movq %rcx, %rbp +; AVX512BW-NEXT: shldq $63, %r10, %rdx +; AVX512BW-NEXT: movq %rdx, %r9 +; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload +; AVX512BW-NEXT: shldq $63, %rbx, %r10 +; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload +; AVX512BW-NEXT: shldq $63, %r13, %r8 ; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512BW-NEXT: shldq $63, %rax, %r12 -; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; AVX512BW-NEXT: shldq $63, %rax, %rdi +; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload +; AVX512BW-NEXT: shldq $63, %rax, %r13 ; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; AVX512BW-NEXT: shldq $63, %rax, %rdx +; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; AVX512BW-NEXT: shldq $63, %rax, %rbx ; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; AVX512BW-NEXT: shldq $63, %rax, %rsi ; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512BW-NEXT: shldq $63, %rax, %r15 +; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; AVX512BW-NEXT: shldq $63, %rax, %rdx ; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512BW-NEXT: shldq $63, %rax, %r13 +; AVX512BW-NEXT: shldq $63, %rax, %r12 +; AVX512BW-NEXT: movq (%rsp), %rcx # 8-byte Reload +; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512BW-NEXT: shldq $63, %rax, %rcx ; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512BW-NEXT: shldq $63, %rax, %rbx -; AVX512BW-NEXT: movq (%rsp), %rax # 8-byte Reload ; AVX512BW-NEXT: shldq $63, %rax, %r14 -; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512BW-NEXT: shldq $63, %rax, %rcx +; AVX512BW-NEXT: shldq $63, %rax, %r15 ; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; AVX512BW-NEXT: shldq $63, %rax, %r10 -; AVX512BW-NEXT: vmovq %r10, %xmm0 -; AVX512BW-NEXT: vmovq %rcx, %xmm1 +; AVX512BW-NEXT: shldq $63, %rax, %rdi +; AVX512BW-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; AVX512BW-NEXT: shldq $63, %rax, %r11 +; AVX512BW-NEXT: vmovq %r11, %xmm0 +; AVX512BW-NEXT: vmovq %rdi, %xmm1 ; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax -; AVX512BW-NEXT: vmovd %eax, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax -; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm1 -; AVX512BW-NEXT: vmovq %r14, %xmm2 -; AVX512BW-NEXT: vmovq %rbx, %xmm3 +; AVX512BW-NEXT: vmovd %xmm1, %eax +; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm0, %xmm1 +; AVX512BW-NEXT: vmovq %r15, %xmm2 +; AVX512BW-NEXT: vmovq %r14, %xmm3 ; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax +; AVX512BW-NEXT: vmovd %xmm2, %eax ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm0 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm1, %xmm0 -; AVX512BW-NEXT: vmovq %r13, %xmm1 -; AVX512BW-NEXT: vmovq %r15, %xmm2 +; AVX512BW-NEXT: vmovq %rcx, %xmm1 +; AVX512BW-NEXT: vmovq %r12, %xmm2 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax +; AVX512BW-NEXT: vmovd %xmm1, %eax ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax +; AVX512BW-NEXT: vmovd %xmm2, %eax ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovq %rsi, %xmm2 -; AVX512BW-NEXT: vmovq %rdx, %xmm3 +; AVX512BW-NEXT: vmovq %rdx, %xmm2 +; AVX512BW-NEXT: vmovq %rsi, %xmm3 ; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax +; AVX512BW-NEXT: vmovd %xmm2, %eax ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm1 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax +; AVX512BW-NEXT: vmovd %xmm1, %eax ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovq %rdi, %xmm1 -; AVX512BW-NEXT: vmovq %r12, %xmm2 +; AVX512BW-NEXT: vmovq %rbx, %xmm1 +; AVX512BW-NEXT: vmovq %r13, %xmm2 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax +; AVX512BW-NEXT: vmovd %xmm1, %eax ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax +; AVX512BW-NEXT: vmovd %xmm2, %eax ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovq %r11, %xmm2 -; AVX512BW-NEXT: vmovq %r9, %xmm3 +; AVX512BW-NEXT: vmovq %r8, %xmm2 +; AVX512BW-NEXT: vmovq %r10, %xmm3 ; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax +; AVX512BW-NEXT: vmovd %xmm2, %eax ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm1 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax +; AVX512BW-NEXT: vmovd %xmm1, %eax ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovq %r8, %xmm1 -; AVX512BW-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 8-byte Folded Reload -; AVX512BW-NEXT: # xmm2 = mem[0],zero +; AVX512BW-NEXT: vmovq %r9, %xmm1 +; AVX512BW-NEXT: vmovq %rbp, %xmm2 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax +; AVX512BW-NEXT: vmovd %xmm1, %eax ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax +; AVX512BW-NEXT: vmovd %xmm2, %eax ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovq %rbp, %xmm2 +; AVX512BW-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 8-byte Folded Reload +; AVX512BW-NEXT: # xmm2 = mem[0],zero ; AVX512BW-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 8-byte Folded Reload ; AVX512BW-NEXT: # xmm3 = mem[0],zero ; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax +; AVX512BW-NEXT: vmovd %xmm2, %eax ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm1 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax +; AVX512BW-NEXT: vmovd %xmm1, %eax ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu %xmm0, (%rax) ; AVX512BW-NEXT: addq $24, %rsp diff --git a/llvm/test/CodeGen/X86/avx512-vec3-crash.ll b/llvm/test/CodeGen/X86/avx512-vec3-crash.ll --- a/llvm/test/CodeGen/X86/avx512-vec3-crash.ll +++ b/llvm/test/CodeGen/X86/avx512-vec3-crash.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm1 ; CHECK-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1 ; CHECK-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: vpextrb $0, %xmm0, %eax +; CHECK-NEXT: vmovd %xmm0, %eax ; CHECK-NEXT: vpextrb $1, %xmm0, %edx ; CHECK-NEXT: vpextrb $2, %xmm0, %ecx ; CHECK-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll --- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll +++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll @@ -158,7 +158,7 @@ ; AVX512-NEXT: vpmovb2m %xmm0, %k0 ; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %ecx +; AVX512-NEXT: vmovd %xmm0, %ecx ; AVX512-NEXT: vpextrb $1, %xmm0, %eax ; AVX512-NEXT: addb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax @@ -311,7 +311,7 @@ ; AVX512-NEXT: vpmovw2m %ymm0, %k0 ; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %ecx +; AVX512-NEXT: vmovd %xmm0, %ecx ; AVX512-NEXT: vpextrb $1, %xmm0, %eax ; AVX512-NEXT: addb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax @@ -505,7 +505,7 @@ ; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 ; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %ecx +; AVX512-NEXT: vmovd %xmm0, %ecx ; AVX512-NEXT: vpextrb $1, %xmm0, %eax ; AVX512-NEXT: addb %cl, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/buildvec-insertvec.ll b/llvm/test/CodeGen/X86/buildvec-insertvec.ll --- a/llvm/test/CodeGen/X86/buildvec-insertvec.ll +++ b/llvm/test/CodeGen/X86/buildvec-insertvec.ll @@ -23,8 +23,6 @@ ; SSE41-NEXT: cvttps2dq %xmm0, %xmm0 ; SSE41-NEXT: pextrb $8, %xmm0, %eax ; SSE41-NEXT: pextrb $4, %xmm0, %ecx -; SSE41-NEXT: pextrb $0, %xmm0, %edx -; SSE41-NEXT: movd %edx, %xmm0 ; SSE41-NEXT: pinsrb $1, %ecx, %xmm0 ; SSE41-NEXT: pinsrb $2, %eax, %xmm0 ; SSE41-NEXT: movl $255, %eax diff --git a/llvm/test/CodeGen/X86/extract-concat.ll b/llvm/test/CodeGen/X86/extract-concat.ll --- a/llvm/test/CodeGen/X86/extract-concat.ll +++ b/llvm/test/CodeGen/X86/extract-concat.ll @@ -10,8 +10,6 @@ ; SSE42-NEXT: cvttps2dq %xmm0, %xmm0 ; SSE42-NEXT: pextrb $8, %xmm0, %eax ; SSE42-NEXT: pextrb $4, %xmm0, %ecx -; SSE42-NEXT: pextrb $0, %xmm0, %edx -; SSE42-NEXT: movd %edx, %xmm0 ; SSE42-NEXT: pinsrb $1, %ecx, %xmm0 ; SSE42-NEXT: pinsrb $2, %eax, %xmm0 ; SSE42-NEXT: movl $255, %eax @@ -24,8 +22,6 @@ ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX-NEXT: vpextrb $8, %xmm0, %eax ; AVX-NEXT: vpextrb $4, %xmm0, %ecx -; AVX-NEXT: vpextrb $0, %xmm0, %edx -; AVX-NEXT: vmovd %edx, %xmm0 ; AVX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ; AVX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; AVX-NEXT: movl $255, %eax diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll --- a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll @@ -310,7 +310,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm1 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: xorb $127, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -321,7 +321,7 @@ ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: xorb $127, %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: retl @@ -365,7 +365,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm1 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: xorb $127, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -376,7 +376,7 @@ ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: vmovd %xmm0, %eax ; X64-AVX-NEXT: xorb $127, %al ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: retq @@ -876,7 +876,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm1 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: xorb $127, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -889,7 +889,7 @@ ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: xorb $127, %al ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper @@ -903,7 +903,7 @@ ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: vmovd %xmm0, %eax ; X86-AVX2-NEXT: xorb $127, %al ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper @@ -954,7 +954,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm1 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: xorb $127, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -967,7 +967,7 @@ ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: vmovd %xmm0, %eax ; X64-AVX1-NEXT: xorb $127, %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper @@ -981,7 +981,7 @@ ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: vmovd %xmm0, %eax ; X64-AVX2-NEXT: xorb $127, %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper @@ -995,7 +995,7 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: vmovd %xmm0, %eax ; X64-AVX512-NEXT: xorb $127, %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper @@ -1662,7 +1662,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm0 ; X86-SSE42-NEXT: pminub %xmm1, %xmm0 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: xorb $127, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -1678,7 +1678,7 @@ ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: xorb $127, %al ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper @@ -1693,7 +1693,7 @@ ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: vmovd %xmm0, %eax ; X86-AVX2-NEXT: xorb $127, %al ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper @@ -1756,7 +1756,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm0 ; X64-SSE42-NEXT: pminub %xmm1, %xmm0 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: xorb $127, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -1772,7 +1772,7 @@ ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: vmovd %xmm0, %eax ; X64-AVX1-NEXT: xorb $127, %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper @@ -1787,7 +1787,7 @@ ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: vmovd %xmm0, %eax ; X64-AVX2-NEXT: xorb $127, %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper @@ -1803,7 +1803,7 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: vmovd %xmm0, %eax ; X64-AVX512-NEXT: xorb $127, %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper @@ -2028,7 +2028,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm1 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: xorb $127, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -2039,7 +2039,7 @@ ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: xorb $127, %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper @@ -2084,7 +2084,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm1 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: xorb $127, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -2095,7 +2095,7 @@ ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: vmovd %xmm0, %eax ; X64-AVX-NEXT: xorb $127, %al ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: vzeroupper @@ -2156,7 +2156,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm1 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: xorb $127, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -2167,7 +2167,7 @@ ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: xorb $127, %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper @@ -2212,7 +2212,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm1 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: xorb $127, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -2223,7 +2223,7 @@ ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: vmovd %xmm0, %eax ; X64-AVX-NEXT: xorb $127, %al ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll --- a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll @@ -312,7 +312,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm1 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: xorb $-128, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -323,7 +323,7 @@ ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: xorb $-128, %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: retl @@ -367,7 +367,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm1 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: xorb $-128, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -378,7 +378,7 @@ ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: vmovd %xmm0, %eax ; X64-AVX-NEXT: xorb $-128, %al ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: retq @@ -880,7 +880,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm1 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: xorb $-128, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -893,7 +893,7 @@ ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: xorb $-128, %al ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper @@ -907,7 +907,7 @@ ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: vmovd %xmm0, %eax ; X86-AVX2-NEXT: xorb $-128, %al ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper @@ -958,7 +958,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm1 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: xorb $-128, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -971,7 +971,7 @@ ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: vmovd %xmm0, %eax ; X64-AVX1-NEXT: xorb $-128, %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper @@ -985,7 +985,7 @@ ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: vmovd %xmm0, %eax ; X64-AVX2-NEXT: xorb $-128, %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper @@ -999,7 +999,7 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: vmovd %xmm0, %eax ; X64-AVX512-NEXT: xorb $-128, %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper @@ -1666,7 +1666,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm0 ; X86-SSE42-NEXT: pminub %xmm1, %xmm0 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: xorb $-128, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -1682,7 +1682,7 @@ ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: xorb $-128, %al ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper @@ -1697,7 +1697,7 @@ ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: vmovd %xmm0, %eax ; X86-AVX2-NEXT: xorb $-128, %al ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper @@ -1760,7 +1760,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm0 ; X64-SSE42-NEXT: pminub %xmm1, %xmm0 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: xorb $-128, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -1776,7 +1776,7 @@ ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: vmovd %xmm0, %eax ; X64-AVX1-NEXT: xorb $-128, %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper @@ -1791,7 +1791,7 @@ ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: vmovd %xmm0, %eax ; X64-AVX2-NEXT: xorb $-128, %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper @@ -1807,7 +1807,7 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: vmovd %xmm0, %eax ; X64-AVX512-NEXT: xorb $-128, %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper @@ -2032,7 +2032,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm1 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: xorb $-128, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -2043,7 +2043,7 @@ ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: xorb $-128, %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper @@ -2088,7 +2088,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm1 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: xorb $-128, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -2099,7 +2099,7 @@ ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: vmovd %xmm0, %eax ; X64-AVX-NEXT: xorb $-128, %al ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: vzeroupper @@ -2160,7 +2160,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm1 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: xorb $-128, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -2171,7 +2171,7 @@ ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: xorb $-128, %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper @@ -2216,7 +2216,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm1 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: xorb $-128, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -2227,7 +2227,7 @@ ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: vmovd %xmm0, %eax ; X64-AVX-NEXT: xorb $-128, %al ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll --- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll @@ -366,7 +366,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm0 ; X86-SSE42-NEXT: pminub %xmm1, %xmm0 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: notb %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -378,7 +378,7 @@ ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: notb %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: retl @@ -407,7 +407,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm0 ; X64-SSE42-NEXT: pminub %xmm1, %xmm0 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: notb %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -419,7 +419,7 @@ ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: vmovd %xmm0, %eax ; X64-AVX1-NEXT: notb %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: retq @@ -431,7 +431,7 @@ ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: vmovd %xmm0, %eax ; X64-AVX2-NEXT: notb %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: retq @@ -442,7 +442,7 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: vmovd %xmm0, %eax ; X64-AVX512-NEXT: notb %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: retq @@ -988,7 +988,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm0 ; X86-SSE42-NEXT: pminub %xmm1, %xmm0 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: notb %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -1002,7 +1002,7 @@ ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: notb %al ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper @@ -1017,7 +1017,7 @@ ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: vmovd %xmm0, %eax ; X86-AVX2-NEXT: notb %al ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper @@ -1049,7 +1049,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm0 ; X64-SSE42-NEXT: pminub %xmm1, %xmm0 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: notb %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -1063,7 +1063,7 @@ ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: vmovd %xmm0, %eax ; X64-AVX1-NEXT: notb %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper @@ -1078,7 +1078,7 @@ ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: vmovd %xmm0, %eax ; X64-AVX2-NEXT: notb %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper @@ -1092,7 +1092,7 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: vmovd %xmm0, %eax ; X64-AVX512-NEXT: notb %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper @@ -1835,7 +1835,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm1 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: notb %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -1852,7 +1852,7 @@ ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: notb %al ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper @@ -1868,7 +1868,7 @@ ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: vmovd %xmm0, %eax ; X86-AVX2-NEXT: notb %al ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper @@ -1904,7 +1904,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm1 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: notb %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -1921,7 +1921,7 @@ ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: vmovd %xmm0, %eax ; X64-AVX1-NEXT: notb %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper @@ -1937,7 +1937,7 @@ ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: vmovd %xmm0, %eax ; X64-AVX2-NEXT: notb %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper @@ -1953,7 +1953,7 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: vmovd %xmm0, %eax ; X64-AVX512-NEXT: notb %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper @@ -2229,7 +2229,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm0 ; X86-SSE42-NEXT: pminub %xmm1, %xmm0 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: notb %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -2241,7 +2241,7 @@ ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: notb %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper @@ -2271,7 +2271,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm0 ; X64-SSE42-NEXT: pminub %xmm1, %xmm0 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: notb %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -2283,7 +2283,7 @@ ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: vmovd %xmm0, %eax ; X64-AVX1-NEXT: notb %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper @@ -2296,7 +2296,7 @@ ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: vmovd %xmm0, %eax ; X64-AVX2-NEXT: notb %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper @@ -2308,7 +2308,7 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: vmovd %xmm0, %eax ; X64-AVX512-NEXT: notb %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper @@ -2354,7 +2354,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm0 ; X86-SSE42-NEXT: pminub %xmm1, %xmm0 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: notb %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl @@ -2366,7 +2366,7 @@ ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: notb %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper @@ -2396,7 +2396,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm0 ; X64-SSE42-NEXT: pminub %xmm1, %xmm0 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: notb %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq @@ -2408,7 +2408,7 @@ ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: vmovd %xmm0, %eax ; X64-AVX1-NEXT: notb %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper @@ -2421,7 +2421,7 @@ ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: vmovd %xmm0, %eax ; X64-AVX2-NEXT: notb %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper @@ -2433,7 +2433,7 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: vmovd %xmm0, %eax ; X64-AVX512-NEXT: notb %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll --- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll @@ -335,7 +335,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm1 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; @@ -344,7 +344,7 @@ ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: retl ; @@ -370,7 +370,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm1 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; @@ -379,7 +379,7 @@ ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: vmovd %xmm0, %eax ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: retq %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> @@ -906,7 +906,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm1 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; @@ -917,7 +917,7 @@ ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -929,7 +929,7 @@ ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: vmovd %xmm0, %eax ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -958,7 +958,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm1 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; @@ -969,7 +969,7 @@ ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: vmovd %xmm0, %eax ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -981,7 +981,7 @@ ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: vmovd %xmm0, %eax ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -993,7 +993,7 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: vmovd %xmm0, %eax ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq @@ -1717,7 +1717,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm0 ; X86-SSE42-NEXT: pminub %xmm1, %xmm0 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; @@ -1731,7 +1731,7 @@ ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: vmovd %xmm0, %eax ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -1744,7 +1744,7 @@ ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: vmovd %xmm0, %eax ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -1777,7 +1777,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm0 ; X64-SSE42-NEXT: pminub %xmm1, %xmm0 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; @@ -1791,7 +1791,7 @@ ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: vmovd %xmm0, %eax ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -1804,7 +1804,7 @@ ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: vmovd %xmm0, %eax ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1818,7 +1818,7 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: vmovd %xmm0, %eax ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq @@ -2025,7 +2025,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm1 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; @@ -2034,7 +2034,7 @@ ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -2061,7 +2061,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm1 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; @@ -2070,7 +2070,7 @@ ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: vmovd %xmm0, %eax ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq @@ -2113,7 +2113,7 @@ ; X86-SSE42-NEXT: psrlw $8, %xmm1 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: movd %xmm0, %eax ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; @@ -2122,7 +2122,7 @@ ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: vmovd %xmm0, %eax ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: vzeroupper ; X86-AVX-NEXT: retl @@ -2149,7 +2149,7 @@ ; X64-SSE42-NEXT: psrlw $8, %xmm1 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 -; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: movd %xmm0, %eax ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; @@ -2158,7 +2158,7 @@ ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: vmovd %xmm0, %eax ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: vzeroupper ; X64-AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/scalar_widen_div.ll b/llvm/test/CodeGen/X86/scalar_widen_div.ll --- a/llvm/test/CodeGen/X86/scalar_widen_div.ll +++ b/llvm/test/CodeGen/X86/scalar_widen_div.ll @@ -261,8 +261,8 @@ ; CHECK-NEXT: cbtw ; CHECK-NEXT: idivb %cl ; CHECK-NEXT: movsbl %ah, %ecx -; CHECK-NEXT: pextrb $0, %xmm1, %edx -; CHECK-NEXT: pextrb $0, %xmm0, %eax +; CHECK-NEXT: movd %xmm1, %edx +; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: cbtw ; CHECK-NEXT: idivb %dl ; CHECK-NEXT: movsbl %ah, %eax diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll --- a/llvm/test/CodeGen/X86/var-permute-128.ll +++ b/llvm/test/CodeGen/X86/var-permute-128.ll @@ -922,7 +922,7 @@ ; SSE41-NEXT: movq %rsp, %rbp ; SSE41-NEXT: andq $-32, %rsp ; SSE41-NEXT: subq $544, %rsp # imm = 0x220 -; SSE41-NEXT: pextrb $0, %xmm2, %eax +; SSE41-NEXT: movd %xmm2, %eax ; SSE41-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; SSE41-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; SSE41-NEXT: andl $31, %eax diff --git a/llvm/test/CodeGen/X86/var-permute-512.ll b/llvm/test/CodeGen/X86/var-permute-512.ll --- a/llvm/test/CodeGen/X86/var-permute-512.ll +++ b/llvm/test/CodeGen/X86/var-permute-512.ll @@ -390,7 +390,7 @@ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 ; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm4 -; AVX512F-NEXT: vpextrb $0, %xmm4, %eax +; AVX512F-NEXT: vmovd %xmm4, %eax ; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp) ; AVX512F-NEXT: vmovaps %ymm0, (%rsp) ; AVX512F-NEXT: vextractf64x4 $1, %zmm0, {{[0-9]+}}(%rsp) @@ -567,7 +567,7 @@ ; AVX512F-NEXT: vpextrb $15, %xmm4, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: vpinsrb $15, 4032(%rsp,%rax), %xmm0, %xmm0 -; AVX512F-NEXT: vpextrb $0, %xmm2, %eax +; AVX512F-NEXT: vmovd %xmm2, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl 2048(%rsp,%rax), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -616,7 +616,7 @@ ; AVX512F-NEXT: vpextrb $15, %xmm2, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: vpinsrb $15, 3008(%rsp,%rax), %xmm4, %xmm2 -; AVX512F-NEXT: vpextrb $0, %xmm3, %eax +; AVX512F-NEXT: vmovd %xmm3, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl 1024(%rsp,%rax), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -665,7 +665,7 @@ ; AVX512F-NEXT: vpextrb $15, %xmm3, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: vpinsrb $15, 1984(%rsp,%rax), %xmm4, %xmm3 -; AVX512F-NEXT: vpextrb $0, %xmm1, %eax +; AVX512F-NEXT: vmovd %xmm1, %eax ; AVX512F-NEXT: andl $63, %eax ; AVX512F-NEXT: movzbl (%rsp,%rax), %eax ; AVX512F-NEXT: vmovd %eax, %xmm4 @@ -730,7 +730,7 @@ ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm4 -; AVX512BW-NEXT: vpextrb $0, %xmm4, %eax +; AVX512BW-NEXT: vmovd %xmm4, %eax ; AVX512BW-NEXT: vmovaps %zmm0, (%rsp) ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax @@ -780,7 +780,7 @@ ; AVX512BW-NEXT: vpextrb $15, %xmm4, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: vpinsrb $15, (%rsp,%rax), %xmm0, %xmm0 -; AVX512BW-NEXT: vpextrb $0, %xmm3, %eax +; AVX512BW-NEXT: vmovd %xmm3, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vmovd %eax, %xmm4 @@ -832,7 +832,7 @@ ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3 -; AVX512BW-NEXT: vpextrb $0, %xmm2, %eax +; AVX512BW-NEXT: vmovd %xmm2, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vmovd %eax, %xmm4 @@ -885,7 +885,7 @@ ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm4, %xmm2 -; AVX512BW-NEXT: vpextrb $0, %xmm1, %eax +; AVX512BW-NEXT: vmovd %xmm1, %eax ; AVX512BW-NEXT: andl $63, %eax ; AVX512BW-NEXT: movzbl (%rsp,%rax), %eax ; AVX512BW-NEXT: vmovd %eax, %xmm4 diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -54,7 +54,7 @@ ; XOP: # %bb.0: ; XOP-NEXT: vmovd %edi, %xmm0 ; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0 -; XOP-NEXT: vpextrb $0, %xmm0, %eax +; XOP-NEXT: vmovd %xmm0, %eax ; XOP-NEXT: # kill: def $al killed $al killed $eax ; XOP-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll --- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll @@ -1059,8 +1059,8 @@ ; SSE41-NEXT: cbtw ; SSE41-NEXT: idivb %cl ; SSE41-NEXT: movsbl %ah, %ecx -; SSE41-NEXT: pextrb $0, %xmm1, %edx -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm1, %edx +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: cbtw ; SSE41-NEXT: idivb %dl ; SSE41-NEXT: movsbl %ah, %eax @@ -1160,8 +1160,8 @@ ; AVX-NEXT: cbtw ; AVX-NEXT: idivb %cl ; AVX-NEXT: movsbl %ah, %ecx -; AVX-NEXT: vpextrb $0, %xmm1, %edx -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm1, %edx +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: cbtw ; AVX-NEXT: idivb %dl ; AVX-NEXT: movsbl %ah, %eax diff --git a/llvm/test/CodeGen/X86/vector-reduce-add.ll b/llvm/test/CodeGen/X86/vector-reduce-add.ll --- a/llvm/test/CodeGen/X86/vector-reduce-add.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-add.ll @@ -942,29 +942,20 @@ ; define i8 @test_v2i8(<2 x i8> %a0) { -; SSE2-LABEL: test_v2i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v2i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: paddb %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v2i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: paddb %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v2i8: ; AVX: # %bb.0: ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -972,7 +963,7 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> %a0) @@ -980,27 +971,16 @@ } define i8 @test_v2i8_load(<2 x i8>* %p) { -; SSE2-LABEL: test_v2i8_load: -; SSE2: # %bb.0: -; SSE2-NEXT: movzwl (%rdi), %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v2i8_load: -; SSE41: # %bb.0: -; SSE41-NEXT: movzwl (%rdi), %eax -; SSE41-NEXT: movd %eax, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: paddb %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v2i8_load: +; SSE: # %bb.0: +; SSE-NEXT: movzwl (%rdi), %eax +; SSE-NEXT: movd %eax, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: paddb %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v2i8_load: ; AVX: # %bb.0: @@ -1008,7 +988,7 @@ ; AVX-NEXT: vmovd %eax, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1018,7 +998,7 @@ ; AVX512-NEXT: vmovd %eax, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %a0 = load <2 x i8>, <2 x i8>* %p @@ -1041,7 +1021,7 @@ ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] ; SSE41-NEXT: psadbw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1050,7 +1030,7 @@ ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] ; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1060,7 +1040,7 @@ ; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> %a0) @@ -1068,30 +1048,21 @@ } define i8 @test_v4i8_load(<4 x i8>* %p) { -; SSE2-LABEL: test_v4i8_load: -; SSE2: # %bb.0: -; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v4i8_load: -; SSE41: # %bb.0: -; SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v4i8_load: +; SSE: # %bb.0: +; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: psadbw %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v4i8_load: ; AVX: # %bb.0: ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1100,7 +1071,7 @@ ; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %a0 = load <4 x i8>, <4 x i8>* %p @@ -1109,27 +1080,19 @@ } define i8 @test_v8i8(<8 x i8> %a0) { -; SSE2-LABEL: test_v8i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v8i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v8i8: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: psadbw %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v8i8: ; AVX: # %bb.0: ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1137,7 +1100,7 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> %a0) @@ -1145,30 +1108,21 @@ } define i8 @test_v8i8_load(<8 x i8>* %p) { -; SSE2-LABEL: test_v8i8_load: -; SSE2: # %bb.0: -; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v8i8_load: -; SSE41: # %bb.0: -; SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v8i8_load: +; SSE: # %bb.0: +; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: psadbw %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v8i8_load: ; AVX: # %bb.0: ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1177,7 +1131,7 @@ ; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %a0 = load <8 x i8>, <8 x i8>* %p @@ -1186,25 +1140,15 @@ } define i8 @test_v16i8(<16 x i8> %a0) { -; SSE2-LABEL: test_v16i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: psadbw %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v16i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE41-NEXT: paddb %xmm0, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: psadbw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v16i8: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: paddb %xmm0, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: psadbw %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v16i8: ; AVX: # %bb.0: @@ -1212,7 +1156,7 @@ ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1222,7 +1166,7 @@ ; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> %a0) @@ -1230,27 +1174,16 @@ } define i8 @test_v32i8(<32 x i8> %a0) { -; SSE2-LABEL: test_v32i8: -; SSE2: # %bb.0: -; SSE2-NEXT: paddb %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: psadbw %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v32i8: -; SSE41: # %bb.0: -; SSE41-NEXT: paddb %xmm1, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE41-NEXT: paddb %xmm0, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: psadbw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v32i8: +; SSE: # %bb.0: +; SSE-NEXT: paddb %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: paddb %xmm0, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: psadbw %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX1-LABEL: test_v32i8: ; AVX1: # %bb.0: @@ -1260,7 +1193,7 @@ ; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1273,7 +1206,7 @@ ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1286,7 +1219,7 @@ ; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1295,31 +1228,18 @@ } define i8 @test_v64i8(<64 x i8> %a0) { -; SSE2-LABEL: test_v64i8: -; SSE2: # %bb.0: -; SSE2-NEXT: paddb %xmm3, %xmm1 -; SSE2-NEXT: paddb %xmm2, %xmm1 -; SSE2-NEXT: paddb %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; SSE2-NEXT: paddb %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v64i8: -; SSE41: # %bb.0: -; SSE41-NEXT: paddb %xmm3, %xmm1 -; SSE41-NEXT: paddb %xmm2, %xmm1 -; SSE41-NEXT: paddb %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; SSE41-NEXT: paddb %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v64i8: +; SSE: # %bb.0: +; SSE-NEXT: paddb %xmm3, %xmm1 +; SSE-NEXT: paddb %xmm2, %xmm1 +; SSE-NEXT: paddb %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE-NEXT: paddb %xmm1, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: psadbw %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX1-LABEL: test_v64i8: ; AVX1: # %bb.0: @@ -1332,7 +1252,7 @@ ; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1346,7 +1266,7 @@ ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1361,7 +1281,7 @@ ; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1370,39 +1290,22 @@ } define i8 @test_v128i8(<128 x i8> %a0) { -; SSE2-LABEL: test_v128i8: -; SSE2: # %bb.0: -; SSE2-NEXT: paddb %xmm7, %xmm3 -; SSE2-NEXT: paddb %xmm5, %xmm3 -; SSE2-NEXT: paddb %xmm1, %xmm3 -; SSE2-NEXT: paddb %xmm6, %xmm2 -; SSE2-NEXT: paddb %xmm4, %xmm2 -; SSE2-NEXT: paddb %xmm3, %xmm2 -; SSE2-NEXT: paddb %xmm0, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] -; SSE2-NEXT: paddb %xmm2, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: psadbw %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v128i8: -; SSE41: # %bb.0: -; SSE41-NEXT: paddb %xmm7, %xmm3 -; SSE41-NEXT: paddb %xmm5, %xmm3 -; SSE41-NEXT: paddb %xmm1, %xmm3 -; SSE41-NEXT: paddb %xmm6, %xmm2 -; SSE41-NEXT: paddb %xmm4, %xmm2 -; SSE41-NEXT: paddb %xmm3, %xmm2 -; SSE41-NEXT: paddb %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] -; SSE41-NEXT: paddb %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm1 -; SSE41-NEXT: psadbw %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v128i8: +; SSE: # %bb.0: +; SSE-NEXT: paddb %xmm7, %xmm3 +; SSE-NEXT: paddb %xmm5, %xmm3 +; SSE-NEXT: paddb %xmm1, %xmm3 +; SSE-NEXT: paddb %xmm6, %xmm2 +; SSE-NEXT: paddb %xmm4, %xmm2 +; SSE-NEXT: paddb %xmm3, %xmm2 +; SSE-NEXT: paddb %xmm0, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE-NEXT: paddb %xmm2, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: psadbw %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX1-LABEL: test_v128i8: ; AVX1: # %bb.0: @@ -1421,7 +1324,7 @@ ; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1437,7 +1340,7 @@ ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1453,7 +1356,7 @@ ; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-and.ll b/llvm/test/CodeGen/X86/vector-reduce-and.ll --- a/llvm/test/CodeGen/X86/vector-reduce-and.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-and.ll @@ -684,29 +684,20 @@ ; define i8 @test_v2i8(<2 x i8> %a0) { -; SSE2-LABEL: test_v2i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v2i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v2i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v2i8: ; AVX: # %bb.0: ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.and.v2i8(<2 x i8> %a0) @@ -714,29 +705,17 @@ } define i8 @test_v4i8(<4 x i8> %a0) { -; SSE2-LABEL: test_v4i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v4i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrld $16, %xmm1 -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v4i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v4i8: ; AVX: # %bb.0: @@ -744,7 +723,7 @@ ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.and.v4i8(<4 x i8> %a0) @@ -752,33 +731,19 @@ } define i8 @test_v8i8(<8 x i8> %a0) { -; SSE2-LABEL: test_v8i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v8i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $16, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v8i8: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v8i8: ; AVX: # %bb.0: @@ -788,7 +753,7 @@ ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.and.v8i8(<8 x i8> %a0) @@ -796,37 +761,21 @@ } define i8 @test_v16i8(<16 x i8> %a0) { -; SSE2-LABEL: test_v16i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v16i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrld $16, %xmm1 -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v16i8: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v16i8: ; AVX: # %bb.0: @@ -838,7 +787,7 @@ ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.and.v16i8(<16 x i8> %a0) @@ -846,39 +795,22 @@ } define i8 @test_v32i8(<32 x i8> %a0) { -; SSE2-LABEL: test_v32i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v32i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrld $16, %xmm1 -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v32i8: +; SSE: # %bb.0: +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX1-LABEL: test_v32i8: ; AVX1: # %bb.0: @@ -892,7 +824,7 @@ ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -909,7 +841,7 @@ ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -926,7 +858,7 @@ ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -935,43 +867,24 @@ } define i8 @test_v64i8(<64 x i8> %a0) { -; SSE2-LABEL: test_v64i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pand %xmm3, %xmm1 -; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v64i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pand %xmm3, %xmm1 -; SSE41-NEXT: pand %xmm2, %xmm1 -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $16, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v64i8: +; SSE: # %bb.0: +; SSE-NEXT: pand %xmm3, %xmm1 +; SSE-NEXT: pand %xmm2, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX1-LABEL: test_v64i8: ; AVX1: # %bb.0: @@ -986,7 +899,7 @@ ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1004,7 +917,7 @@ ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1023,7 +936,7 @@ ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1032,51 +945,28 @@ } define i8 @test_v128i8(<128 x i8> %a0) { -; SSE2-LABEL: test_v128i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pand %xmm6, %xmm2 -; SSE2-NEXT: pand %xmm7, %xmm3 -; SSE2-NEXT: pand %xmm5, %xmm3 -; SSE2-NEXT: pand %xmm1, %xmm3 -; SSE2-NEXT: pand %xmm4, %xmm2 -; SSE2-NEXT: pand %xmm3, %xmm2 -; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] -; SSE2-NEXT: pand %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v128i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pand %xmm6, %xmm2 -; SSE41-NEXT: pand %xmm7, %xmm3 -; SSE41-NEXT: pand %xmm5, %xmm3 -; SSE41-NEXT: pand %xmm1, %xmm3 -; SSE41-NEXT: pand %xmm4, %xmm2 -; SSE41-NEXT: pand %xmm3, %xmm2 -; SSE41-NEXT: pand %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] -; SSE41-NEXT: pand %xmm2, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $16, %xmm0 -; SSE41-NEXT: pand %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pand %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v128i8: +; SSE: # %bb.0: +; SSE-NEXT: pand %xmm6, %xmm2 +; SSE-NEXT: pand %xmm7, %xmm3 +; SSE-NEXT: pand %xmm5, %xmm3 +; SSE-NEXT: pand %xmm1, %xmm3 +; SSE-NEXT: pand %xmm4, %xmm2 +; SSE-NEXT: pand %xmm3, %xmm2 +; SSE-NEXT: pand %xmm0, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: pand %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX1-LABEL: test_v128i8: ; AVX1: # %bb.0: @@ -1093,7 +983,7 @@ ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1113,7 +1003,7 @@ ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1133,7 +1023,7 @@ ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll b/llvm/test/CodeGen/X86/vector-reduce-mul.ll --- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll @@ -1531,29 +1531,20 @@ ; define i8 @test_v2i8(<2 x i8> %a0) { -; SSE2-LABEL: test_v2i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: pmullw %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v2i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pmullw %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v2i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: pmullw %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v2i8: ; AVX: # %bb.0: ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1561,7 +1552,7 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.mul.v2i8(<2 x i8> %a0) @@ -1591,7 +1582,7 @@ ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: psrld $16, %xmm0 ; SSE41-NEXT: pmullw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1602,7 +1593,7 @@ ; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1613,7 +1604,7 @@ ; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: retq ; @@ -1625,7 +1616,7 @@ ; AVX512VL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512VL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: vmovd %xmm0, %eax ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: retq ; @@ -1636,7 +1627,7 @@ ; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax +; AVX512DQ-NEXT: vmovd %xmm0, %eax ; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.mul.v4i8(<4 x i8> %a0) @@ -1671,7 +1662,7 @@ ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: psrld $16, %xmm0 ; SSE41-NEXT: pmullw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1684,7 +1675,7 @@ ; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1697,7 +1688,7 @@ ; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: retq ; @@ -1711,7 +1702,7 @@ ; AVX512VL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512VL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: vmovd %xmm0, %eax ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: retq ; @@ -1724,7 +1715,7 @@ ; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax +; AVX512DQ-NEXT: vmovd %xmm0, %eax ; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.mul.v8i8(<8 x i8> %a0) @@ -1783,7 +1774,7 @@ ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pmullw %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax +; SSE41-NEXT: movd %xmm1, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1801,7 +1792,7 @@ ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: retq ; @@ -1817,7 +1808,7 @@ ; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[2],zero,xmm0[4],zero,xmm0[6],zero,xmm0[8],zero,xmm0[10],zero,xmm0[12],zero,xmm0[14],zero ; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: retq ; @@ -1838,7 +1829,7 @@ ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm1 ; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1860,7 +1851,7 @@ ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm1 ; AVX512BWVL-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BWVL-NEXT: vmovd %xmm0, %eax ; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq @@ -1885,7 +1876,7 @@ ; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQ-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax +; AVX512DQ-NEXT: vmovd %xmm0, %eax ; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -1910,7 +1901,7 @@ ; AVX512DQVL-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512DQVL-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512DQVL-NEXT: vmovd %xmm0, %eax ; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq @@ -1984,7 +1975,7 @@ ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: pmullw %xmm2, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2007,7 +1998,7 @@ ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2039,7 +2030,7 @@ ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2067,7 +2058,7 @@ ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2095,7 +2086,7 @@ ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512BWVL-NEXT: vpmullw %zmm1, %zmm0, %zmm0 ; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 -; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BWVL-NEXT: vmovd %xmm0, %eax ; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq @@ -2127,7 +2118,7 @@ ; AVX512DQ-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax +; AVX512DQ-NEXT: vmovd %xmm0, %eax ; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -2159,7 +2150,7 @@ ; AVX512DQVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512DQVL-NEXT: vmovd %xmm0, %eax ; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq @@ -2256,7 +2247,7 @@ ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: pmullw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2288,7 +2279,7 @@ ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2323,7 +2314,7 @@ ; AVX2-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2364,7 +2355,7 @@ ; AVX512BW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2405,7 +2396,7 @@ ; AVX512BWVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BWVL-NEXT: vmovd %xmm0, %eax ; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq @@ -2441,7 +2432,7 @@ ; AVX512DQ-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax +; AVX512DQ-NEXT: vmovd %xmm0, %eax ; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -2482,7 +2473,7 @@ ; AVX512DQVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512DQVL-NEXT: vmovd %xmm0, %eax ; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq @@ -2632,7 +2623,7 @@ ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pmullw %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax +; SSE41-NEXT: movd %xmm1, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2682,7 +2673,7 @@ ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2725,7 +2716,7 @@ ; AVX2-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2774,7 +2765,7 @@ ; AVX512BW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2823,7 +2814,7 @@ ; AVX512BWVL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BWVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BWVL-NEXT: vmovd %xmm0, %eax ; AVX512BWVL-NEXT: # kill: def $al killed $al killed $eax ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq @@ -2866,7 +2857,7 @@ ; AVX512DQ-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512DQ-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax +; AVX512DQ-NEXT: vmovd %xmm0, %eax ; AVX512DQ-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -2914,7 +2905,7 @@ ; AVX512DQVL-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 ; AVX512DQVL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512DQVL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX512DQVL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512DQVL-NEXT: vmovd %xmm0, %eax ; AVX512DQVL-NEXT: # kill: def $al killed $al killed $eax ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-or.ll b/llvm/test/CodeGen/X86/vector-reduce-or.ll --- a/llvm/test/CodeGen/X86/vector-reduce-or.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or.ll @@ -684,29 +684,20 @@ ; define i8 @test_v2i8(<2 x i8> %a0) { -; SSE2-LABEL: test_v2i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v2i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v2i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v2i8: ; AVX: # %bb.0: ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> %a0) @@ -714,29 +705,17 @@ } define i8 @test_v4i8(<4 x i8> %a0) { -; SSE2-LABEL: test_v4i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v4i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrld $16, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v4i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v4i8: ; AVX: # %bb.0: @@ -744,7 +723,7 @@ ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> %a0) @@ -752,33 +731,19 @@ } define i8 @test_v8i8(<8 x i8> %a0) { -; SSE2-LABEL: test_v8i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v8i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $16, %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v8i8: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v8i8: ; AVX: # %bb.0: @@ -788,7 +753,7 @@ ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %a0) @@ -796,37 +761,21 @@ } define i8 @test_v16i8(<16 x i8> %a0) { -; SSE2-LABEL: test_v16i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v16i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrld $16, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v16i8: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v16i8: ; AVX: # %bb.0: @@ -838,7 +787,7 @@ ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %a0) @@ -846,39 +795,22 @@ } define i8 @test_v32i8(<32 x i8> %a0) { -; SSE2-LABEL: test_v32i8: -; SSE2: # %bb.0: -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v32i8: -; SSE41: # %bb.0: -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrld $16, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v32i8: +; SSE: # %bb.0: +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX1-LABEL: test_v32i8: ; AVX1: # %bb.0: @@ -892,7 +824,7 @@ ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -909,7 +841,7 @@ ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -926,7 +858,7 @@ ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -935,43 +867,24 @@ } define i8 @test_v64i8(<64 x i8> %a0) { -; SSE2-LABEL: test_v64i8: -; SSE2: # %bb.0: -; SSE2-NEXT: por %xmm3, %xmm1 -; SSE2-NEXT: por %xmm2, %xmm1 -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v64i8: -; SSE41: # %bb.0: -; SSE41-NEXT: por %xmm3, %xmm1 -; SSE41-NEXT: por %xmm2, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $16, %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v64i8: +; SSE: # %bb.0: +; SSE-NEXT: por %xmm3, %xmm1 +; SSE-NEXT: por %xmm2, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX1-LABEL: test_v64i8: ; AVX1: # %bb.0: @@ -986,7 +899,7 @@ ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1004,7 +917,7 @@ ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1023,7 +936,7 @@ ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1032,51 +945,28 @@ } define i8 @test_v128i8(<128 x i8> %a0) { -; SSE2-LABEL: test_v128i8: -; SSE2: # %bb.0: -; SSE2-NEXT: por %xmm6, %xmm2 -; SSE2-NEXT: por %xmm7, %xmm3 -; SSE2-NEXT: por %xmm5, %xmm3 -; SSE2-NEXT: por %xmm1, %xmm3 -; SSE2-NEXT: por %xmm4, %xmm2 -; SSE2-NEXT: por %xmm3, %xmm2 -; SSE2-NEXT: por %xmm0, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] -; SSE2-NEXT: por %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v128i8: -; SSE41: # %bb.0: -; SSE41-NEXT: por %xmm6, %xmm2 -; SSE41-NEXT: por %xmm7, %xmm3 -; SSE41-NEXT: por %xmm5, %xmm3 -; SSE41-NEXT: por %xmm1, %xmm3 -; SSE41-NEXT: por %xmm4, %xmm2 -; SSE41-NEXT: por %xmm3, %xmm2 -; SSE41-NEXT: por %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] -; SSE41-NEXT: por %xmm2, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $16, %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v128i8: +; SSE: # %bb.0: +; SSE-NEXT: por %xmm6, %xmm2 +; SSE-NEXT: por %xmm7, %xmm3 +; SSE-NEXT: por %xmm5, %xmm3 +; SSE-NEXT: por %xmm1, %xmm3 +; SSE-NEXT: por %xmm4, %xmm2 +; SSE-NEXT: por %xmm3, %xmm2 +; SSE-NEXT: por %xmm0, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE-NEXT: por %xmm2, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: por %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX1-LABEL: test_v128i8: ; AVX1: # %bb.0: @@ -1093,7 +983,7 @@ ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1113,7 +1003,7 @@ ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1133,7 +1023,7 @@ ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-smax.ll b/llvm/test/CodeGen/X86/vector-reduce-smax.ll --- a/llvm/test/CodeGen/X86/vector-reduce-smax.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-smax.ll @@ -1400,7 +1400,7 @@ ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pmaxsb %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax +; SSE41-NEXT: movd %xmm1, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1408,7 +1408,7 @@ ; AVX: # %bb.0: ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1416,7 +1416,7 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.smax.v2i8(<2 x i8> %a0) @@ -1452,7 +1452,7 @@ ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: pmaxsb %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1462,7 +1462,7 @@ ; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1472,7 +1472,7 @@ ; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.smax.v4i8(<4 x i8> %a0) @@ -1516,7 +1516,7 @@ ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pmaxsb %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax +; SSE41-NEXT: movd %xmm1, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1528,7 +1528,7 @@ ; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1540,7 +1540,7 @@ ; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.smax.v8i8(<8 x i8> %a0) @@ -1587,7 +1587,7 @@ ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pminub %xmm0, %xmm1 ; SSE41-NEXT: phminposuw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: xorb $127, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -1598,7 +1598,7 @@ ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: xorb $127, %al ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq @@ -1609,7 +1609,7 @@ ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: xorb $127, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq @@ -1663,7 +1663,7 @@ ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pminub %xmm0, %xmm1 ; SSE41-NEXT: phminposuw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: xorb $127, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -1676,7 +1676,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: xorb $127, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -1690,7 +1690,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: xorb $127, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -1704,7 +1704,7 @@ ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: xorb $127, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper @@ -1771,7 +1771,7 @@ ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: pminub %xmm1, %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: xorb $127, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -1787,7 +1787,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: xorb $127, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -1802,7 +1802,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: xorb $127, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -1818,7 +1818,7 @@ ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: xorb $127, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper @@ -1909,7 +1909,7 @@ ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: pminub %xmm2, %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: xorb $127, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -1931,7 +1931,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: xorb $127, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -1948,7 +1948,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: xorb $127, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -1965,7 +1965,7 @@ ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: xorb $127, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-reduce-smin.ll b/llvm/test/CodeGen/X86/vector-reduce-smin.ll --- a/llvm/test/CodeGen/X86/vector-reduce-smin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-smin.ll @@ -1398,7 +1398,7 @@ ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pminsb %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax +; SSE41-NEXT: movd %xmm1, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1406,7 +1406,7 @@ ; AVX: # %bb.0: ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1414,7 +1414,7 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.smin.v2i8(<2 x i8> %a0) @@ -1450,7 +1450,7 @@ ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: pminsb %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1460,7 +1460,7 @@ ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1470,7 +1470,7 @@ ; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.smin.v4i8(<4 x i8> %a0) @@ -1514,7 +1514,7 @@ ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pminsb %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax +; SSE41-NEXT: movd %xmm1, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1526,7 +1526,7 @@ ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1538,7 +1538,7 @@ ; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.smin.v8i8(<8 x i8> %a0) @@ -1585,7 +1585,7 @@ ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pminub %xmm0, %xmm1 ; SSE41-NEXT: phminposuw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: xorb $-128, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -1596,7 +1596,7 @@ ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: xorb $-128, %al ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq @@ -1607,7 +1607,7 @@ ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: xorb $-128, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq @@ -1661,7 +1661,7 @@ ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pminub %xmm0, %xmm1 ; SSE41-NEXT: phminposuw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: xorb $-128, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -1674,7 +1674,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: xorb $-128, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -1688,7 +1688,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: xorb $-128, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -1702,7 +1702,7 @@ ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: xorb $-128, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper @@ -1769,7 +1769,7 @@ ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: pminub %xmm1, %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: xorb $-128, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -1785,7 +1785,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: xorb $-128, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -1800,7 +1800,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: xorb $-128, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -1816,7 +1816,7 @@ ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: xorb $-128, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper @@ -1907,7 +1907,7 @@ ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: pminub %xmm2, %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: xorb $-128, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -1929,7 +1929,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: xorb $-128, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -1946,7 +1946,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: xorb $-128, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -1963,7 +1963,7 @@ ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: xorb $-128, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll --- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll @@ -1614,29 +1614,20 @@ ; define i8 @test_v2i8(<2 x i8> %a0) { -; SSE2-LABEL: test_v2i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: pmaxub %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v2i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pmaxub %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v2i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: pmaxub %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v2i8: ; AVX: # %bb.0: ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1644,7 +1635,7 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.umax.v2i8(<2 x i8> %a0) @@ -1652,29 +1643,17 @@ } define i8 @test_v4i8(<4 x i8> %a0) { -; SSE2-LABEL: test_v4i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pmaxub %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pmaxub %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v4i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrld $16, %xmm1 -; SSE41-NEXT: pmaxub %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pmaxub %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v4i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pmaxub %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: pmaxub %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v4i8: ; AVX: # %bb.0: @@ -1682,7 +1661,7 @@ ; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1692,7 +1671,7 @@ ; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.umax.v4i8(<4 x i8> %a0) @@ -1700,33 +1679,19 @@ } define i8 @test_v8i8(<8 x i8> %a0) { -; SSE2-LABEL: test_v8i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE2-NEXT: pmaxub %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: pmaxub %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: pmaxub %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v8i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE41-NEXT: pmaxub %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $16, %xmm0 -; SSE41-NEXT: pmaxub %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pmaxub %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v8i8: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: pmaxub %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: pmaxub %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: pmaxub %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v8i8: ; AVX: # %bb.0: @@ -1736,7 +1701,7 @@ ; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1748,7 +1713,7 @@ ; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.umax.v8i8(<8 x i8> %a0) @@ -1780,7 +1745,7 @@ ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: pminub %xmm1, %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: notb %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -1792,7 +1757,7 @@ ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: notb %al ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq @@ -1804,7 +1769,7 @@ ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper @@ -1816,7 +1781,7 @@ ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: vmovd %xmm0, %eax ; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: retq @@ -1851,7 +1816,7 @@ ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: pminub %xmm1, %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: notb %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -1865,7 +1830,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: notb %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -1880,7 +1845,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: notb %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -1894,7 +1859,7 @@ ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper @@ -1908,7 +1873,7 @@ ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: vmovd %xmm0, %eax ; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: vzeroupper @@ -1948,7 +1913,7 @@ ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pminub %xmm0, %xmm1 ; SSE41-NEXT: phminposuw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: notb %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -1965,7 +1930,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: notb %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -1981,7 +1946,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: notb %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -1997,7 +1962,7 @@ ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper @@ -2013,7 +1978,7 @@ ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: vmovd %xmm0, %eax ; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: vzeroupper @@ -2061,7 +2026,7 @@ ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pminub %xmm0, %xmm1 ; SSE41-NEXT: phminposuw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: notb %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq @@ -2084,7 +2049,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: notb %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper @@ -2102,7 +2067,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: notb %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper @@ -2119,7 +2084,7 @@ ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper @@ -2136,7 +2101,7 @@ ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: vmovd %xmm0, %eax ; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll --- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll @@ -1519,29 +1519,20 @@ ; define i8 @test_v2i8(<2 x i8> %a0) { -; SSE2-LABEL: test_v2i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: pminub %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v2i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pminub %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v2i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: pminub %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v2i8: ; AVX: # %bb.0: ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1549,7 +1540,7 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.umin.v2i8(<2 x i8> %a0) @@ -1557,29 +1548,17 @@ } define i8 @test_v4i8(<4 x i8> %a0) { -; SSE2-LABEL: test_v4i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pminub %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pminub %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v4i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrld $16, %xmm1 -; SSE41-NEXT: pminub %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pminub %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v4i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pminub %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: pminub %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v4i8: ; AVX: # %bb.0: @@ -1587,7 +1566,7 @@ ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1597,7 +1576,7 @@ ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.umin.v4i8(<4 x i8> %a0) @@ -1605,33 +1584,19 @@ } define i8 @test_v8i8(<8 x i8> %a0) { -; SSE2-LABEL: test_v8i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE2-NEXT: pminub %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: pminub %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: pminub %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v8i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE41-NEXT: pminub %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $16, %xmm0 -; SSE41-NEXT: pminub %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pminub %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v8i8: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: pminub %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: pminub %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: pminub %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v8i8: ; AVX: # %bb.0: @@ -1641,7 +1606,7 @@ ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1653,7 +1618,7 @@ ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.umin.v8i8(<8 x i8> %a0) @@ -1683,7 +1648,7 @@ ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pminub %xmm0, %xmm1 ; SSE41-NEXT: phminposuw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1692,7 +1657,7 @@ ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1701,7 +1666,7 @@ ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %a0) @@ -1733,7 +1698,7 @@ ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: pminub %xmm0, %xmm1 ; SSE41-NEXT: phminposuw %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1744,7 +1709,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1756,7 +1721,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1768,7 +1733,7 @@ ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1805,7 +1770,7 @@ ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: pminub %xmm1, %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1819,7 +1784,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1832,7 +1797,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1846,7 +1811,7 @@ ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1891,7 +1856,7 @@ ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: pminub %xmm2, %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: movd %xmm0, %eax ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1911,7 +1876,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1926,7 +1891,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1941,7 +1906,7 @@ ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor.ll b/llvm/test/CodeGen/X86/vector-reduce-xor.ll --- a/llvm/test/CodeGen/X86/vector-reduce-xor.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-xor.ll @@ -684,29 +684,20 @@ ; define i8 @test_v2i8(<2 x i8> %a0) { -; SSE2-LABEL: test_v2i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v2i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v2i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v2i8: ; AVX: # %bb.0: ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.xor.v2i8(<2 x i8> %a0) @@ -714,29 +705,17 @@ } define i8 @test_v4i8(<4 x i8> %a0) { -; SSE2-LABEL: test_v4i8: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v4i8: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrld $16, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v4i8: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v4i8: ; AVX: # %bb.0: @@ -744,7 +723,7 @@ ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.xor.v4i8(<4 x i8> %a0) @@ -752,33 +731,19 @@ } define i8 @test_v8i8(<8 x i8> %a0) { -; SSE2-LABEL: test_v8i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v8i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $16, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v8i8: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: pxor %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v8i8: ; AVX: # %bb.0: @@ -788,7 +753,7 @@ ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.xor.v8i8(<8 x i8> %a0) @@ -796,37 +761,21 @@ } define i8 @test_v16i8(<16 x i8> %a0) { -; SSE2-LABEL: test_v16i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v16i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrld $16, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v16i8: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pxor %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX-LABEL: test_v16i8: ; AVX: # %bb.0: @@ -838,7 +787,7 @@ ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: vmovd %xmm0, %eax ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.xor.v16i8(<16 x i8> %a0) @@ -846,39 +795,22 @@ } define i8 @test_v32i8(<32 x i8> %a0) { -; SSE2-LABEL: test_v32i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrlw $8, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v32i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrld $16, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: pextrb $0, %xmm0, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v32i8: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: pxor %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrld $16, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX1-LABEL: test_v32i8: ; AVX1: # %bb.0: @@ -892,7 +824,7 @@ ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -909,7 +841,7 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -926,7 +858,7 @@ ; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -935,43 +867,24 @@ } define i8 @test_v64i8(<64 x i8> %a0) { -; SSE2-LABEL: test_v64i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pxor %xmm3, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v64i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm3, %xmm1 -; SSE41-NEXT: pxor %xmm2, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $16, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v64i8: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm3, %xmm1 +; SSE-NEXT: pxor %xmm2, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: pxor %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX1-LABEL: test_v64i8: ; AVX1: # %bb.0: @@ -986,7 +899,7 @@ ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1004,7 +917,7 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1023,7 +936,7 @@ ; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1032,51 +945,28 @@ } define i8 @test_v128i8(<128 x i8> %a0) { -; SSE2-LABEL: test_v128i8: -; SSE2: # %bb.0: -; SSE2-NEXT: pxor %xmm6, %xmm2 -; SSE2-NEXT: pxor %xmm7, %xmm3 -; SSE2-NEXT: pxor %xmm5, %xmm3 -; SSE2-NEXT: pxor %xmm1, %xmm3 -; SSE2-NEXT: pxor %xmm4, %xmm2 -; SSE2-NEXT: pxor %xmm3, %xmm2 -; SSE2-NEXT: pxor %xmm0, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: # kill: def $al killed $al killed $eax -; SSE2-NEXT: retq -; -; SSE41-LABEL: test_v128i8: -; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm6, %xmm2 -; SSE41-NEXT: pxor %xmm7, %xmm3 -; SSE41-NEXT: pxor %xmm5, %xmm3 -; SSE41-NEXT: pxor %xmm1, %xmm3 -; SSE41-NEXT: pxor %xmm4, %xmm2 -; SSE41-NEXT: pxor %xmm3, %xmm2 -; SSE41-NEXT: pxor %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: psrld $16, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: pextrb $0, %xmm1, %eax -; SSE41-NEXT: # kill: def $al killed $al killed $eax -; SSE41-NEXT: retq +; SSE-LABEL: test_v128i8: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm6, %xmm2 +; SSE-NEXT: pxor %xmm7, %xmm3 +; SSE-NEXT: pxor %xmm5, %xmm3 +; SSE-NEXT: pxor %xmm1, %xmm3 +; SSE-NEXT: pxor %xmm4, %xmm2 +; SSE-NEXT: pxor %xmm3, %xmm2 +; SSE-NEXT: pxor %xmm0, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] +; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSE-NEXT: pxor %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: psrlw $8, %xmm1 +; SSE-NEXT: pxor %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: # kill: def $al killed $al killed $eax +; SSE-NEXT: retq ; ; AVX1-LABEL: test_v128i8: ; AVX1: # %bb.0: @@ -1093,7 +983,7 @@ ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1113,7 +1003,7 @@ ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vmovd %xmm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1133,7 +1023,7 @@ ; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: vmovd %xmm0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/widen_bitops-0.ll b/llvm/test/CodeGen/X86/widen_bitops-0.ll --- a/llvm/test/CodeGen/X86/widen_bitops-0.ll +++ b/llvm/test/CodeGen/X86/widen_bitops-0.ll @@ -138,7 +138,7 @@ ; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm1 ; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm1 ; X32-SSE-NEXT: pand %xmm0, %xmm1 -; X32-SSE-NEXT: pextrb $0, %xmm1, %eax +; X32-SSE-NEXT: movd %xmm1, %eax ; X32-SSE-NEXT: pextrb $1, %xmm1, %edx ; X32-SSE-NEXT: pextrb $2, %xmm1, %ecx ; X32-SSE-NEXT: # kill: def $al killed $al killed $eax @@ -155,7 +155,7 @@ ; X64-SSE-NEXT: pinsrb $1, %esi, %xmm1 ; X64-SSE-NEXT: pinsrb $2, %edx, %xmm1 ; X64-SSE-NEXT: pand %xmm0, %xmm1 -; X64-SSE-NEXT: pextrb $0, %xmm1, %eax +; X64-SSE-NEXT: movd %xmm1, %eax ; X64-SSE-NEXT: pextrb $1, %xmm1, %edx ; X64-SSE-NEXT: pextrb $2, %xmm1, %ecx ; X64-SSE-NEXT: # kill: def $al killed $al killed $eax @@ -179,7 +179,7 @@ ; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm1 ; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm1 ; X32-SSE-NEXT: pxor %xmm0, %xmm1 -; X32-SSE-NEXT: pextrb $0, %xmm1, %eax +; X32-SSE-NEXT: movd %xmm1, %eax ; X32-SSE-NEXT: pextrb $1, %xmm1, %edx ; X32-SSE-NEXT: pextrb $2, %xmm1, %ecx ; X32-SSE-NEXT: # kill: def $al killed $al killed $eax @@ -196,7 +196,7 @@ ; X64-SSE-NEXT: pinsrb $1, %esi, %xmm1 ; X64-SSE-NEXT: pinsrb $2, %edx, %xmm1 ; X64-SSE-NEXT: pxor %xmm0, %xmm1 -; X64-SSE-NEXT: pextrb $0, %xmm1, %eax +; X64-SSE-NEXT: movd %xmm1, %eax ; X64-SSE-NEXT: pextrb $1, %xmm1, %edx ; X64-SSE-NEXT: pextrb $2, %xmm1, %ecx ; X64-SSE-NEXT: # kill: def $al killed $al killed $eax @@ -220,7 +220,7 @@ ; X32-SSE-NEXT: pinsrb $1, {{[0-9]+}}(%esp), %xmm1 ; X32-SSE-NEXT: pinsrb $2, {{[0-9]+}}(%esp), %xmm1 ; X32-SSE-NEXT: por %xmm0, %xmm1 -; X32-SSE-NEXT: pextrb $0, %xmm1, %eax +; X32-SSE-NEXT: movd %xmm1, %eax ; X32-SSE-NEXT: pextrb $1, %xmm1, %edx ; X32-SSE-NEXT: pextrb $2, %xmm1, %ecx ; X32-SSE-NEXT: # kill: def $al killed $al killed $eax @@ -237,7 +237,7 @@ ; X64-SSE-NEXT: pinsrb $1, %esi, %xmm1 ; X64-SSE-NEXT: pinsrb $2, %edx, %xmm1 ; X64-SSE-NEXT: por %xmm0, %xmm1 -; X64-SSE-NEXT: pextrb $0, %xmm1, %eax +; X64-SSE-NEXT: movd %xmm1, %eax ; X64-SSE-NEXT: pextrb $1, %xmm1, %edx ; X64-SSE-NEXT: pextrb $2, %xmm1, %ecx ; X64-SSE-NEXT: # kill: def $al killed $al killed $eax