diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -111,6 +111,7 @@ def : WriteRes; def : WriteRes { let Latency = 5; } def : WriteRes; +def : InstRW<[WriteMove], (instrs COPY)>; def : WriteRes; def : WriteRes { let Latency = 5; let NumMicroOps = 0; } diff --git a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll --- a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll +++ b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll @@ -35,8 +35,8 @@ ; CHECK-NEXT: movq X(%rip), %rdi ; CHECK-NEXT: addq %rbx, %r12 ; CHECK-NEXT: addq %r8, %rdx -; CHECK-NEXT: bswapq %rdi ; CHECK-NEXT: addq %rbx, %rdx +; CHECK-NEXT: bswapq %rdi ; CHECK-NEXT: leaq (%r15,%r14), %rsi ; CHECK-NEXT: addq %r12, %rsi ; CHECK-NEXT: addq %r11, %rdi @@ -61,8 +61,8 @@ ; CHECK-NEXT: addq %r9, %rbx ; CHECK-NEXT: addq %rbx, %r10 ; CHECK-NEXT: addq %rsi, %rdi -; CHECK-NEXT: bswapq %rcx ; CHECK-NEXT: addq %rbx, %rdi +; CHECK-NEXT: bswapq %rcx ; CHECK-NEXT: leaq (%r9,%r12), %rax ; CHECK-NEXT: addq %r10, %rax ; CHECK-NEXT: addq %r15, %rcx @@ -87,8 +87,8 @@ ; CHECK-NEXT: addq %r8, %rbx ; CHECK-NEXT: addq %rbx, %rdx ; CHECK-NEXT: addq %r11, %r14 -; CHECK-NEXT: bswapq %rax ; CHECK-NEXT: addq %rbx, %r14 +; CHECK-NEXT: bswapq %rax ; CHECK-NEXT: leaq (%r8,%r10), %rbx ; CHECK-NEXT: addq %rdx, %rbx ; CHECK-NEXT: addq %r9, %rax @@ -113,8 +113,8 @@ ; CHECK-NEXT: addq %rsi, %rbx ; CHECK-NEXT: addq %rbx, %rdi ; CHECK-NEXT: addq %r9, %r10 -; CHECK-NEXT: bswapq %rax ; CHECK-NEXT: addq %rbx, %r10 +; CHECK-NEXT: bswapq %rax ; CHECK-NEXT: leaq (%rsi,%rdx), %rbx ; CHECK-NEXT: addq %rdi, %rbx ; CHECK-NEXT: addq %r8, %rax @@ -139,8 +139,8 @@ ; CHECK-NEXT: addq %r11, %rbx ; CHECK-NEXT: addq %rbx, %r14 ; CHECK-NEXT: addq %r8, %r15 -; CHECK-NEXT: bswapq %rax ; CHECK-NEXT: addq %rbx, %r15 +; CHECK-NEXT: bswapq %rax ; CHECK-NEXT: leaq (%r11,%rdi), %rbx ; CHECK-NEXT: addq %r14, %rbx ; CHECK-NEXT: addq %rsi, %rax @@ -165,8 +165,8 @@ ; CHECK-NEXT: addq %r9, %rbx ; CHECK-NEXT: addq %rbx, %r10 ; CHECK-NEXT: addq %rsi, %r12 -; CHECK-NEXT: bswapq %rcx ; CHECK-NEXT: addq %rbx, %r12 +; CHECK-NEXT: bswapq %rcx ; CHECK-NEXT: leaq (%r9,%r14), %rax ; CHECK-NEXT: addq %r10, %rax ; CHECK-NEXT: addq %r11, %rcx @@ -191,8 +191,8 @@ ; CHECK-NEXT: addq %r8, %rbx ; CHECK-NEXT: addq %rbx, %r15 ; CHECK-NEXT: addq %rax, %rcx -; CHECK-NEXT: bswapq %rdx ; CHECK-NEXT: addq %rbx, %rcx +; CHECK-NEXT: bswapq %rdx ; CHECK-NEXT: leaq (%r8,%r10), %rbx ; CHECK-NEXT: addq %r15, %rbx ; CHECK-NEXT: addq %r9, %rdx @@ -217,9 +217,9 @@ ; CHECK-NEXT: addq %rsi, %rdx ; CHECK-NEXT: addq %rdx, %r12 ; CHECK-NEXT: addq %rdx, %rcx -; CHECK-NEXT: addq %r15, %rsi ; CHECK-NEXT: movq X(%rip), %rax ; CHECK-NEXT: bswapq %rax +; CHECK-NEXT: addq %r15, %rsi ; CHECK-NEXT: movq %rax, X(%rip) ; CHECK-NEXT: addq %r8, %rax ; CHECK-NEXT: addq %r12, %rsi diff --git a/llvm/test/CodeGen/X86/fp-load-trunc.ll b/llvm/test/CodeGen/X86/fp-load-trunc.ll --- a/llvm/test/CodeGen/X86/fp-load-trunc.ll +++ b/llvm/test/CodeGen/X86/fp-load-trunc.ll @@ -69,12 +69,12 @@ ; CHECK-LABEL: test4: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: cvtpd2ps 16(%eax), %xmm1 +; CHECK-NEXT: cvtpd2ps 16(%eax), %xmm2 ; CHECK-NEXT: cvtpd2ps (%eax), %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; CHECK-NEXT: cvtpd2ps 48(%eax), %xmm2 +; CHECK-NEXT: cvtpd2ps 48(%eax), %xmm3 ; CHECK-NEXT: cvtpd2ps 32(%eax), %xmm1 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; CHECK-NEXT: retl ; ; AVX-LABEL: test4: diff --git a/llvm/test/CodeGen/X86/fp-trunc.ll b/llvm/test/CodeGen/X86/fp-trunc.ll --- a/llvm/test/CodeGen/X86/fp-trunc.ll +++ b/llvm/test/CodeGen/X86/fp-trunc.ll @@ -61,11 +61,11 @@ ; CHECK-LABEL: test4: ; CHECK: # %bb.0: ; CHECK-NEXT: subl $12, %esp -; CHECK-NEXT: cvtpd2ps %xmm1, %xmm1 +; CHECK-NEXT: cvtpd2ps %xmm1, %xmm3 ; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: cvtpd2ps %xmm2, %xmm1 ; CHECK-NEXT: cvtpd2ps {{[0-9]+}}(%esp), %xmm2 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/gather-addresses.ll b/llvm/test/CodeGen/X86/gather-addresses.ll --- a/llvm/test/CodeGen/X86/gather-addresses.ll +++ b/llvm/test/CodeGen/X86/gather-addresses.ll @@ -42,9 +42,9 @@ ; LIN-SSE4-NEXT: cltq ; LIN-SSE4-NEXT: movslq %ecx, %rcx ; LIN-SSE4-NEXT: movslq %edx, %rdx +; LIN-SSE4-NEXT: movslq %esi, %rsi ; LIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; LIN-SSE4-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] -; LIN-SSE4-NEXT: movslq %esi, %rax ; LIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; LIN-SSE4-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] ; LIN-SSE4-NEXT: retq @@ -81,9 +81,9 @@ ; WIN-SSE4-NEXT: cltq ; WIN-SSE4-NEXT: movslq %edx, %rdx ; WIN-SSE4-NEXT: movslq %r8d, %r8 +; WIN-SSE4-NEXT: movslq %r9d, %r9 ; WIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; WIN-SSE4-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] -; WIN-SSE4-NEXT: movslq %r9d, %rax ; WIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; WIN-SSE4-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] ; WIN-SSE4-NEXT: retq @@ -94,13 +94,13 @@ ; LIN32-NEXT: pushl %esi ; LIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; LIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx -; LIN32-NEXT: movdqa (%edx), %xmm0 -; LIN32-NEXT: pand (%ecx), %xmm0 -; LIN32-NEXT: pextrd $1, %xmm0, %ecx -; LIN32-NEXT: pextrd $2, %xmm0, %edx -; LIN32-NEXT: pextrd $3, %xmm0, %esi -; LIN32-NEXT: movd %xmm0, %edi +; LIN32-NEXT: movdqa (%ecx), %xmm0 +; LIN32-NEXT: pand (%eax), %xmm0 +; LIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; LIN32-NEXT: movd %xmm0, %ecx +; LIN32-NEXT: pextrd $1, %xmm0, %edx +; LIN32-NEXT: pextrd $2, %xmm0, %esi +; LIN32-NEXT: pextrd $3, %xmm0, %edi ; LIN32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; LIN32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; LIN32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero @@ -226,22 +226,22 @@ ; LIN32-NEXT: pushl %esi ; LIN32-NEXT: movl {{[0-9]+}}(%esp), %eax ; LIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx -; LIN32-NEXT: movdqa (%edx), %xmm0 -; LIN32-NEXT: pand (%ecx), %xmm0 +; LIN32-NEXT: movdqa (%ecx), %xmm0 +; LIN32-NEXT: pand (%eax), %xmm0 +; LIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; LIN32-NEXT: movd %xmm0, %edx ; LIN32-NEXT: pextrd $1, %xmm0, %esi -; LIN32-NEXT: pextrd $2, %xmm0, %ecx +; LIN32-NEXT: pextrd $2, %xmm0, %eax ; LIN32-NEXT: pextrd $3, %xmm0, %edi -; LIN32-NEXT: andl %eax, %edx -; LIN32-NEXT: andl %eax, %esi -; LIN32-NEXT: andl %eax, %ecx -; LIN32-NEXT: andl %eax, %edi +; LIN32-NEXT: andl %ecx, %edx +; LIN32-NEXT: andl %ecx, %esi +; LIN32-NEXT: andl %ecx, %eax +; LIN32-NEXT: andl %ecx, %edi ; LIN32-NEXT: movd %esi, %xmm1 ; LIN32-NEXT: movd %edx, %xmm0 ; LIN32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; LIN32-NEXT: movd %edi, %xmm2 -; LIN32-NEXT: movd %ecx, %xmm1 +; LIN32-NEXT: movd %eax, %xmm1 ; LIN32-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; LIN32-NEXT: popl %esi ; LIN32-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll --- a/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll +++ b/llvm/test/CodeGen/X86/machine-combiner-int-vec.ll @@ -9,15 +9,15 @@ ; SSE-LABEL: reassociate_and_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: paddd %xmm1, %xmm0 -; SSE-NEXT: pand %xmm3, %xmm2 ; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: pand %xmm3, %xmm0 ; SSE-NEXT: retq ; ; AVX2-LABEL: reassociate_and_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm1 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: vpand %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_and_v4i32: @@ -36,15 +36,15 @@ ; SSE-LABEL: reassociate_or_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: paddd %xmm1, %xmm0 -; SSE-NEXT: por %xmm3, %xmm2 ; SSE-NEXT: por %xmm2, %xmm0 +; SSE-NEXT: por %xmm3, %xmm0 ; SSE-NEXT: retq ; ; AVX2-LABEL: reassociate_or_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm3, %xmm2, %xmm1 -; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_or_v4i32: @@ -63,15 +63,15 @@ ; SSE-LABEL: reassociate_xor_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: paddd %xmm1, %xmm0 -; SSE-NEXT: pxor %xmm3, %xmm2 ; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: pxor %xmm3, %xmm0 ; SSE-NEXT: retq ; ; AVX2-LABEL: reassociate_xor_v4i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm1 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: vpxor %xmm0, %xmm3, %xmm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_xor_v4i32: @@ -92,18 +92,18 @@ ; SSE-LABEL: reassociate_and_v8i32: ; SSE: # %bb.0: ; SSE-NEXT: paddd %xmm2, %xmm0 -; SSE-NEXT: pand %xmm6, %xmm4 ; SSE-NEXT: pand %xmm4, %xmm0 +; SSE-NEXT: pand %xmm6, %xmm0 ; SSE-NEXT: paddd %xmm3, %xmm1 -; SSE-NEXT: pand %xmm7, %xmm5 ; SSE-NEXT: pand %xmm5, %xmm1 +; SSE-NEXT: pand %xmm7, %xmm1 ; SSE-NEXT: retq ; ; AVX2-LABEL: reassociate_and_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm1 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpand %ymm0, %ymm3, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_and_v8i32: @@ -122,18 +122,18 @@ ; SSE-LABEL: reassociate_or_v8i32: ; SSE: # %bb.0: ; SSE-NEXT: paddd %xmm2, %xmm0 -; SSE-NEXT: por %xmm6, %xmm4 ; SSE-NEXT: por %xmm4, %xmm0 +; SSE-NEXT: por %xmm6, %xmm0 ; SSE-NEXT: paddd %xmm3, %xmm1 -; SSE-NEXT: por %xmm7, %xmm5 ; SSE-NEXT: por %xmm5, %xmm1 +; SSE-NEXT: por %xmm7, %xmm1 ; SSE-NEXT: retq ; ; AVX2-LABEL: reassociate_or_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm1 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpor %ymm0, %ymm3, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_or_v8i32: @@ -152,18 +152,18 @@ ; SSE-LABEL: reassociate_xor_v8i32: ; SSE: # %bb.0: ; SSE-NEXT: paddd %xmm2, %xmm0 -; SSE-NEXT: pxor %xmm6, %xmm4 ; SSE-NEXT: pxor %xmm4, %xmm0 +; SSE-NEXT: pxor %xmm6, %xmm0 ; SSE-NEXT: paddd %xmm3, %xmm1 -; SSE-NEXT: pxor %xmm7, %xmm5 ; SSE-NEXT: pxor %xmm5, %xmm1 +; SSE-NEXT: pxor %xmm7, %xmm1 ; SSE-NEXT: retq ; ; AVX2-LABEL: reassociate_xor_v8i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm1 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpxor %ymm0, %ymm3, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_xor_v8i32: @@ -201,11 +201,11 @@ ; AVX2-LABEL: reassociate_and_v16i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpand %ymm0, %ymm6, %ymm0 ; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpand %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpand %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_and_v16i32: @@ -240,11 +240,11 @@ ; AVX2-LABEL: reassociate_or_v16i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpor %ymm0, %ymm6, %ymm0 ; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpor %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpor %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpor %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_or_v16i32: @@ -279,11 +279,11 @@ ; AVX2-LABEL: reassociate_xor_v16i32: ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpxor %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpxor %ymm0, %ymm6, %ymm0 ; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpxor %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpxor %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpxor %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_xor_v16i32: @@ -304,15 +304,15 @@ ; SSE-LABEL: reassociate_umax_v16i8: ; SSE: # %bb.0: ; SSE-NEXT: paddb %xmm1, %xmm0 -; SSE-NEXT: pmaxub %xmm3, %xmm2 ; SSE-NEXT: pmaxub %xmm2, %xmm0 +; SSE-NEXT: pmaxub %xmm3, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: reassociate_umax_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmaxub %xmm3, %xmm2, %xmm1 -; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmaxub %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpmaxub %xmm0, %xmm3, %xmm0 ; AVX-NEXT: retq %t0 = add <16 x i8> %x0, %x1 @@ -336,8 +336,8 @@ ; AVX-LABEL: reassociate_umax_v8i16: ; AVX: # %bb.0: ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmaxuw %xmm3, %xmm2, %xmm1 -; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmaxuw %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpmaxuw %xmm0, %xmm3, %xmm0 ; AVX-NEXT: retq %t0 = add <8 x i16> %x0, %x1 @@ -374,8 +374,8 @@ ; AVX-LABEL: reassociate_umax_v4i32: ; AVX: # %bb.0: ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmaxud %xmm3, %xmm2, %xmm1 -; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmaxud %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpmaxud %xmm0, %xmm3, %xmm0 ; AVX-NEXT: retq %t0 = add <4 x i32> %x0, %x1 @@ -439,8 +439,8 @@ ; AVX512-LABEL: reassociate_umax_v2i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpmaxuq %xmm3, %xmm2, %xmm1 -; AVX512-NEXT: vpmaxuq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpmaxuq %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vpmaxuq %xmm0, %xmm3, %xmm0 ; AVX512-NEXT: retq %t0 = add <2 x i64> %x0, %x1 @@ -470,8 +470,8 @@ ; AVX-LABEL: reassociate_smax_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmaxsb %xmm3, %xmm2, %xmm1 -; AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmaxsb %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpmaxsb %xmm0, %xmm3, %xmm0 ; AVX-NEXT: retq %t0 = add <16 x i8> %x0, %x1 @@ -486,15 +486,15 @@ ; SSE-LABEL: reassociate_smax_v8i16: ; SSE: # %bb.0: ; SSE-NEXT: paddw %xmm1, %xmm0 -; SSE-NEXT: pmaxsw %xmm3, %xmm2 ; SSE-NEXT: pmaxsw %xmm2, %xmm0 +; SSE-NEXT: pmaxsw %xmm3, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: reassociate_smax_v8i16: ; AVX: # %bb.0: ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmaxsw %xmm3, %xmm2, %xmm1 -; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmaxsw %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpmaxsw %xmm0, %xmm3, %xmm0 ; AVX-NEXT: retq %t0 = add <8 x i16> %x0, %x1 @@ -524,8 +524,8 @@ ; AVX-LABEL: reassociate_smax_v4i32: ; AVX: # %bb.0: ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmaxsd %xmm3, %xmm2, %xmm1 -; AVX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpmaxsd %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpmaxsd %xmm0, %xmm3, %xmm0 ; AVX-NEXT: retq %t0 = add <4 x i32> %x0, %x1 @@ -584,8 +584,8 @@ ; AVX512-LABEL: reassociate_smax_v2i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpmaxsq %xmm3, %xmm2, %xmm1 -; AVX512-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpmaxsq %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vpmaxsq %xmm0, %xmm3, %xmm0 ; AVX512-NEXT: retq %t0 = add <2 x i64> %x0, %x1 @@ -600,15 +600,15 @@ ; SSE-LABEL: reassociate_umin_v16i8: ; SSE: # %bb.0: ; SSE-NEXT: paddb %xmm1, %xmm0 -; SSE-NEXT: pminub %xmm3, %xmm2 ; SSE-NEXT: pminub %xmm2, %xmm0 +; SSE-NEXT: pminub %xmm3, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: reassociate_umin_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpminub %xmm3, %xmm2, %xmm1 -; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpminub %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpminub %xmm0, %xmm3, %xmm0 ; AVX-NEXT: retq %t0 = add <16 x i8> %x0, %x1 @@ -635,8 +635,8 @@ ; AVX-LABEL: reassociate_umin_v8i16: ; AVX: # %bb.0: ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpminuw %xmm3, %xmm2, %xmm1 -; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpminuw %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpminuw %xmm0, %xmm3, %xmm0 ; AVX-NEXT: retq %t0 = add <8 x i16> %x0, %x1 @@ -672,8 +672,8 @@ ; AVX-LABEL: reassociate_umin_v4i32: ; AVX: # %bb.0: ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpminud %xmm3, %xmm2, %xmm1 -; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpminud %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpminud %xmm0, %xmm3, %xmm0 ; AVX-NEXT: retq %t0 = add <4 x i32> %x0, %x1 @@ -737,8 +737,8 @@ ; AVX512-LABEL: reassociate_umin_v2i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpminuq %xmm3, %xmm2, %xmm1 -; AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpminuq %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vpminuq %xmm0, %xmm3, %xmm0 ; AVX512-NEXT: retq %t0 = add <2 x i64> %x0, %x1 @@ -768,8 +768,8 @@ ; AVX-LABEL: reassociate_smin_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpminsb %xmm3, %xmm2, %xmm1 -; AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpminsb %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpminsb %xmm0, %xmm3, %xmm0 ; AVX-NEXT: retq %t0 = add <16 x i8> %x0, %x1 @@ -784,15 +784,15 @@ ; SSE-LABEL: reassociate_smin_v8i16: ; SSE: # %bb.0: ; SSE-NEXT: paddw %xmm1, %xmm0 -; SSE-NEXT: pminsw %xmm3, %xmm2 ; SSE-NEXT: pminsw %xmm2, %xmm0 +; SSE-NEXT: pminsw %xmm3, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: reassociate_smin_v8i16: ; AVX: # %bb.0: ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpminsw %xmm3, %xmm2, %xmm1 -; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpminsw %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpminsw %xmm0, %xmm3, %xmm0 ; AVX-NEXT: retq %t0 = add <8 x i16> %x0, %x1 @@ -822,8 +822,8 @@ ; AVX-LABEL: reassociate_smin_v4i32: ; AVX: # %bb.0: ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpminsd %xmm3, %xmm2, %xmm1 -; AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpminsd %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpminsd %xmm0, %xmm3, %xmm0 ; AVX-NEXT: retq %t0 = add <4 x i32> %x0, %x1 @@ -882,8 +882,8 @@ ; AVX512-LABEL: reassociate_smin_v2i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpminsq %xmm3, %xmm2, %xmm1 -; AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpminsq %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vpminsq %xmm0, %xmm3, %xmm0 ; AVX512-NEXT: retq %t0 = add <2 x i64> %x0, %x1 @@ -901,17 +901,17 @@ ; SSE: # %bb.0: ; SSE-NEXT: paddb %xmm2, %xmm0 ; SSE-NEXT: paddb %xmm3, %xmm1 -; SSE-NEXT: pmaxub %xmm6, %xmm4 -; SSE-NEXT: pmaxub %xmm4, %xmm0 -; SSE-NEXT: pmaxub %xmm7, %xmm5 ; SSE-NEXT: pmaxub %xmm5, %xmm1 +; SSE-NEXT: pmaxub %xmm4, %xmm0 +; SSE-NEXT: pmaxub %xmm6, %xmm0 +; SSE-NEXT: pmaxub %xmm7, %xmm1 ; SSE-NEXT: retq ; ; AVX-LABEL: reassociate_umax_v32i8: ; AVX: # %bb.0: ; AVX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpmaxub %ymm3, %ymm2, %ymm1 -; AVX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpmaxub %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vpmaxub %ymm0, %ymm3, %ymm0 ; AVX-NEXT: retq %t0 = add <32 x i8> %x0, %x1 @@ -940,8 +940,8 @@ ; AVX-LABEL: reassociate_umax_v16i16: ; AVX: # %bb.0: ; AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpmaxuw %ymm3, %ymm2, %ymm1 -; AVX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpmaxuw %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vpmaxuw %ymm0, %ymm3, %ymm0 ; AVX-NEXT: retq %t0 = add <16 x i16> %x0, %x1 @@ -995,8 +995,8 @@ ; AVX-LABEL: reassociate_umax_v8i32: ; AVX: # %bb.0: ; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpmaxud %ymm3, %ymm2, %ymm1 -; AVX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpmaxud %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vpmaxud %ymm0, %ymm3, %ymm0 ; AVX-NEXT: retq %t0 = add <8 x i32> %x0, %x1 @@ -1076,23 +1076,23 @@ ; ; AVX2-LABEL: reassociate_umax_v4i64: ; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %ymm1, %ymm2, %ymm4 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm5 -; AVX2-NEXT: vpcmpgtq %ymm5, %ymm4, %ymm4 -; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vxorpd %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vpxor %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm1 +; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm5 +; AVX2-NEXT: vpcmpgtq %ymm5, %ymm1, %ymm1 +; AVX2-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm1 +; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm2 +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vblendvpd %ymm1, %ymm3, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_umax_v4i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpmaxuq %ymm3, %ymm2, %ymm1 -; AVX512-NEXT: vpmaxuq %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpmaxuq %ymm0, %ymm2, %ymm0 +; AVX512-NEXT: vpmaxuq %ymm0, %ymm3, %ymm0 ; AVX512-NEXT: retq %t0 = add <4 x i64> %x0, %x1 @@ -1133,8 +1133,8 @@ ; AVX-LABEL: reassociate_smax_v32i8: ; AVX: # %bb.0: ; AVX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpmaxsb %ymm3, %ymm2, %ymm1 -; AVX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpmaxsb %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vpmaxsb %ymm0, %ymm3, %ymm0 ; AVX-NEXT: retq %t0 = add <32 x i8> %x0, %x1 @@ -1150,17 +1150,17 @@ ; SSE: # %bb.0: ; SSE-NEXT: paddw %xmm2, %xmm0 ; SSE-NEXT: paddw %xmm3, %xmm1 -; SSE-NEXT: pmaxsw %xmm6, %xmm4 -; SSE-NEXT: pmaxsw %xmm4, %xmm0 -; SSE-NEXT: pmaxsw %xmm7, %xmm5 ; SSE-NEXT: pmaxsw %xmm5, %xmm1 +; SSE-NEXT: pmaxsw %xmm4, %xmm0 +; SSE-NEXT: pmaxsw %xmm6, %xmm0 +; SSE-NEXT: pmaxsw %xmm7, %xmm1 ; SSE-NEXT: retq ; ; AVX-LABEL: reassociate_smax_v16i16: ; AVX: # %bb.0: ; AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpmaxsw %ymm3, %ymm2, %ymm1 -; AVX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpmaxsw %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vpmaxsw %ymm0, %ymm3, %ymm0 ; AVX-NEXT: retq %t0 = add <16 x i16> %x0, %x1 @@ -1201,8 +1201,8 @@ ; AVX-LABEL: reassociate_smax_v8i32: ; AVX: # %bb.0: ; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpmaxsd %ymm3, %ymm2, %ymm1 -; AVX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpmaxsd %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vpmaxsd %ymm0, %ymm3, %ymm0 ; AVX-NEXT: retq %t0 = add <8 x i32> %x0, %x1 @@ -1292,8 +1292,8 @@ ; AVX512-LABEL: reassociate_smax_v4i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpmaxsq %ymm3, %ymm2, %ymm1 -; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpmaxsq %ymm0, %ymm2, %ymm0 +; AVX512-NEXT: vpmaxsq %ymm0, %ymm3, %ymm0 ; AVX512-NEXT: retq %t0 = add <4 x i64> %x0, %x1 @@ -1309,17 +1309,17 @@ ; SSE: # %bb.0: ; SSE-NEXT: paddb %xmm2, %xmm0 ; SSE-NEXT: paddb %xmm3, %xmm1 -; SSE-NEXT: pminub %xmm6, %xmm4 -; SSE-NEXT: pminub %xmm4, %xmm0 -; SSE-NEXT: pminub %xmm7, %xmm5 ; SSE-NEXT: pminub %xmm5, %xmm1 +; SSE-NEXT: pminub %xmm4, %xmm0 +; SSE-NEXT: pminub %xmm6, %xmm0 +; SSE-NEXT: pminub %xmm7, %xmm1 ; SSE-NEXT: retq ; ; AVX-LABEL: reassociate_umin_v32i8: ; AVX: # %bb.0: ; AVX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpminub %ymm3, %ymm2, %ymm1 -; AVX-NEXT: vpminub %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpminub %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vpminub %ymm0, %ymm3, %ymm0 ; AVX-NEXT: retq %t0 = add <32 x i8> %x0, %x1 @@ -1354,8 +1354,8 @@ ; AVX-LABEL: reassociate_umin_v16i16: ; AVX: # %bb.0: ; AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpminuw %ymm3, %ymm2, %ymm1 -; AVX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpminuw %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vpminuw %ymm0, %ymm3, %ymm0 ; AVX-NEXT: retq %t0 = add <16 x i16> %x0, %x1 @@ -1408,8 +1408,8 @@ ; AVX-LABEL: reassociate_umin_v8i32: ; AVX: # %bb.0: ; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpminud %ymm3, %ymm2, %ymm1 -; AVX-NEXT: vpminud %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpminud %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vpminud %ymm0, %ymm3, %ymm0 ; AVX-NEXT: retq %t0 = add <8 x i32> %x0, %x1 @@ -1489,23 +1489,23 @@ ; ; AVX2-LABEL: reassociate_umin_v4i64: ; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %ymm1, %ymm2, %ymm4 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm5 -; AVX2-NEXT: vpcmpgtq %ymm4, %ymm5, %ymm4 -; AVX2-NEXT: vblendvpd %ymm4, %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vxorpd %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vpxor %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm1 +; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm5 +; AVX2-NEXT: vpcmpgtq %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vxorpd %ymm4, %ymm0, %ymm1 +; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm2 +; AVX2-NEXT: vpcmpgtq %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: vblendvpd %ymm1, %ymm3, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_umin_v4i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpminuq %ymm3, %ymm2, %ymm1 -; AVX512-NEXT: vpminuq %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpminuq %ymm0, %ymm2, %ymm0 +; AVX512-NEXT: vpminuq %ymm0, %ymm3, %ymm0 ; AVX512-NEXT: retq %t0 = add <4 x i64> %x0, %x1 @@ -1546,8 +1546,8 @@ ; AVX-LABEL: reassociate_smin_v32i8: ; AVX: # %bb.0: ; AVX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpminsb %ymm3, %ymm2, %ymm1 -; AVX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpminsb %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vpminsb %ymm0, %ymm3, %ymm0 ; AVX-NEXT: retq %t0 = add <32 x i8> %x0, %x1 @@ -1563,17 +1563,17 @@ ; SSE: # %bb.0: ; SSE-NEXT: paddw %xmm2, %xmm0 ; SSE-NEXT: paddw %xmm3, %xmm1 -; SSE-NEXT: pminsw %xmm6, %xmm4 -; SSE-NEXT: pminsw %xmm4, %xmm0 -; SSE-NEXT: pminsw %xmm7, %xmm5 ; SSE-NEXT: pminsw %xmm5, %xmm1 +; SSE-NEXT: pminsw %xmm4, %xmm0 +; SSE-NEXT: pminsw %xmm6, %xmm0 +; SSE-NEXT: pminsw %xmm7, %xmm1 ; SSE-NEXT: retq ; ; AVX-LABEL: reassociate_smin_v16i16: ; AVX: # %bb.0: ; AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpminsw %ymm3, %ymm2, %ymm1 -; AVX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpminsw %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vpminsw %ymm0, %ymm3, %ymm0 ; AVX-NEXT: retq %t0 = add <16 x i16> %x0, %x1 @@ -1614,8 +1614,8 @@ ; AVX-LABEL: reassociate_smin_v8i32: ; AVX: # %bb.0: ; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vpminsd %ymm3, %ymm2, %ymm1 -; AVX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpminsd %ymm0, %ymm2, %ymm0 +; AVX-NEXT: vpminsd %ymm0, %ymm3, %ymm0 ; AVX-NEXT: retq %t0 = add <8 x i32> %x0, %x1 @@ -1705,8 +1705,8 @@ ; AVX512-LABEL: reassociate_smin_v4i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpminsq %ymm3, %ymm2, %ymm1 -; AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpminsq %ymm0, %ymm2, %ymm0 +; AVX512-NEXT: vpminsq %ymm0, %ymm3, %ymm0 ; AVX512-NEXT: retq %t0 = add <4 x i64> %x0, %x1 @@ -1740,17 +1740,17 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpmaxub %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpmaxub %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpmaxub %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpmaxub %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmaxub %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpmaxub %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpmaxub %ymm0, %ymm6, %ymm0 +; AVX2-NEXT: vpmaxub %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_umax_v64i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxub %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpmaxub %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmaxub %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpmaxub %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <64 x i8> %x0, %x1 @@ -1798,17 +1798,17 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddw %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpmaxuw %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpmaxuw %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpmaxuw %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpmaxuw %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmaxuw %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpmaxuw %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpmaxuw %ymm0, %ymm6, %ymm0 +; AVX2-NEXT: vpmaxuw %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_umax_v32i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxuw %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmaxuw %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpmaxuw %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <32 x i16> %x0, %x1 @@ -1907,17 +1907,17 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpmaxud %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpmaxud %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpmaxud %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpmaxud %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmaxud %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpmaxud %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpmaxud %ymm0, %ymm6, %ymm0 +; AVX2-NEXT: vpmaxud %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_umax_v16i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxud %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpmaxud %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmaxud %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpmaxud %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <16 x i32> %x0, %x1 @@ -2067,32 +2067,32 @@ ; ; AVX2-LABEL: reassociate_umax_v8i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpaddq %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %ymm3, %ymm5, %ymm8 -; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm9 -; AVX2-NEXT: vpcmpgtq %ymm9, %ymm8, %ymm8 -; AVX2-NEXT: vblendvpd %ymm8, %ymm5, %ymm1, %ymm1 ; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %ymm3, %ymm4, %ymm2 -; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm5 -; AVX2-NEXT: vpcmpgtq %ymm5, %ymm2, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm4, %ymm0, %ymm0 -; AVX2-NEXT: vxorpd %ymm3, %ymm0, %ymm2 -; AVX2-NEXT: vpxor %ymm3, %ymm6, %ymm4 -; AVX2-NEXT: vpcmpgtq %ymm2, %ymm4, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm6, %ymm0, %ymm0 -; AVX2-NEXT: vxorpd %ymm3, %ymm1, %ymm2 -; AVX2-NEXT: vpxor %ymm3, %ymm7, %ymm3 -; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpaddq %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpxor %ymm2, %ymm5, %ymm3 +; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm8 +; AVX2-NEXT: vpcmpgtq %ymm8, %ymm3, %ymm3 +; AVX2-NEXT: vblendvpd %ymm3, %ymm5, %ymm1, %ymm1 +; AVX2-NEXT: vpxor %ymm2, %ymm4, %ymm3 +; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm5 +; AVX2-NEXT: vpcmpgtq %ymm5, %ymm3, %ymm3 +; AVX2-NEXT: vblendvpd %ymm3, %ymm4, %ymm0, %ymm0 +; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 +; AVX2-NEXT: vpxor %ymm2, %ymm6, %ymm4 +; AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3 +; AVX2-NEXT: vblendvpd %ymm3, %ymm6, %ymm0, %ymm0 +; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm3 +; AVX2-NEXT: vpxor %ymm2, %ymm7, %ymm2 +; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vblendvpd %ymm2, %ymm7, %ymm1, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_umax_v8i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxuq %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmaxuq %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpmaxuq %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <8 x i64> %x0, %x1 @@ -2164,17 +2164,17 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpmaxsb %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpmaxsb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpmaxsb %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpmaxsb %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmaxsb %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpmaxsb %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpmaxsb %ymm0, %ymm6, %ymm0 +; AVX2-NEXT: vpmaxsb %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_smax_v64i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxsb %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmaxsb %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpmaxsb %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <64 x i8> %x0, %x1 @@ -2206,17 +2206,17 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddw %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpmaxsw %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpmaxsw %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpmaxsw %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpmaxsw %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmaxsw %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpmaxsw %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpmaxsw %ymm0, %ymm6, %ymm0 +; AVX2-NEXT: vpmaxsw %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_smax_v32i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxsw %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmaxsw %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpmaxsw %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <32 x i16> %x0, %x1 @@ -2288,17 +2288,17 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpmaxsd %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpmaxsd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpmaxsd %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpmaxsd %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpmaxsd %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpmaxsd %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpmaxsd %ymm0, %ymm6, %ymm0 +; AVX2-NEXT: vpmaxsd %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_smax_v16i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxsd %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpmaxsd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmaxsd %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpmaxsd %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <16 x i32> %x0, %x1 @@ -2463,8 +2463,8 @@ ; AVX512-LABEL: reassociate_smax_v8i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpmaxsq %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmaxsq %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpmaxsq %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <8 x i64> %x0, %x1 @@ -2496,17 +2496,17 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpminub %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpminub %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpminub %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpminub %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpminub %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpminub %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpminub %ymm0, %ymm6, %ymm0 +; AVX2-NEXT: vpminub %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_umin_v64i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpminub %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpminub %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpminub %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpminub %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <64 x i8> %x0, %x1 @@ -2566,17 +2566,17 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddw %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpminuw %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpminuw %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpminuw %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpminuw %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpminuw %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpminuw %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpminuw %ymm0, %ymm6, %ymm0 +; AVX2-NEXT: vpminuw %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_umin_v32i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpminuw %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpminuw %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpminuw %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpminuw %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <32 x i16> %x0, %x1 @@ -2672,17 +2672,17 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpminud %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpminud %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpminud %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpminud %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpminud %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpminud %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpminud %ymm0, %ymm6, %ymm0 +; AVX2-NEXT: vpminud %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_umin_v16i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpminud %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpminud %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpminud %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <16 x i32> %x0, %x1 @@ -2832,32 +2832,32 @@ ; ; AVX2-LABEL: reassociate_umin_v8i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpaddq %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %ymm3, %ymm5, %ymm8 -; AVX2-NEXT: vpxor %ymm3, %ymm1, %ymm9 -; AVX2-NEXT: vpcmpgtq %ymm8, %ymm9, %ymm8 -; AVX2-NEXT: vblendvpd %ymm8, %ymm5, %ymm1, %ymm1 ; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %ymm3, %ymm4, %ymm2 -; AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm5 -; AVX2-NEXT: vpcmpgtq %ymm2, %ymm5, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm4, %ymm0, %ymm0 -; AVX2-NEXT: vxorpd %ymm3, %ymm0, %ymm2 -; AVX2-NEXT: vpxor %ymm3, %ymm6, %ymm4 -; AVX2-NEXT: vpcmpgtq %ymm4, %ymm2, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm6, %ymm0, %ymm0 -; AVX2-NEXT: vxorpd %ymm3, %ymm1, %ymm2 -; AVX2-NEXT: vpxor %ymm3, %ymm7, %ymm3 -; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpaddq %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: vpxor %ymm2, %ymm5, %ymm3 +; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm8 +; AVX2-NEXT: vpcmpgtq %ymm3, %ymm8, %ymm3 +; AVX2-NEXT: vblendvpd %ymm3, %ymm5, %ymm1, %ymm1 +; AVX2-NEXT: vpxor %ymm2, %ymm4, %ymm3 +; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm5 +; AVX2-NEXT: vpcmpgtq %ymm3, %ymm5, %ymm3 +; AVX2-NEXT: vblendvpd %ymm3, %ymm4, %ymm0, %ymm0 +; AVX2-NEXT: vxorpd %ymm2, %ymm0, %ymm3 +; AVX2-NEXT: vpxor %ymm2, %ymm6, %ymm4 +; AVX2-NEXT: vpcmpgtq %ymm4, %ymm3, %ymm3 +; AVX2-NEXT: vblendvpd %ymm3, %ymm6, %ymm0, %ymm0 +; AVX2-NEXT: vxorpd %ymm2, %ymm1, %ymm3 +; AVX2-NEXT: vpxor %ymm2, %ymm7, %ymm2 +; AVX2-NEXT: vpcmpgtq %ymm2, %ymm3, %ymm2 ; AVX2-NEXT: vblendvpd %ymm2, %ymm7, %ymm1, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_umin_v8i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpminuq %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpminuq %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpminuq %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <8 x i64> %x0, %x1 @@ -2929,17 +2929,17 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpminsb %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpminsb %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpminsb %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpminsb %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpminsb %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpminsb %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpminsb %ymm0, %ymm6, %ymm0 +; AVX2-NEXT: vpminsb %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_smin_v64i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpminsb %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpminsb %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpminsb %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpminsb %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <64 x i8> %x0, %x1 @@ -2971,17 +2971,17 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddw %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpminsw %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpminsw %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpminsw %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpminsw %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpminsw %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpminsw %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpminsw %ymm0, %ymm6, %ymm0 +; AVX2-NEXT: vpminsw %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_smin_v32i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpminsw %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpminsw %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpminsw %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpminsw %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <32 x i16> %x0, %x1 @@ -3053,17 +3053,17 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1 -; AVX2-NEXT: vpminsd %ymm6, %ymm4, %ymm2 -; AVX2-NEXT: vpminsd %ymm2, %ymm0, %ymm0 -; AVX2-NEXT: vpminsd %ymm7, %ymm5, %ymm2 -; AVX2-NEXT: vpminsd %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpminsd %ymm1, %ymm5, %ymm1 +; AVX2-NEXT: vpminsd %ymm0, %ymm4, %ymm0 +; AVX2-NEXT: vpminsd %ymm0, %ymm6, %ymm0 +; AVX2-NEXT: vpminsd %ymm1, %ymm7, %ymm1 ; AVX2-NEXT: retq ; ; AVX512-LABEL: reassociate_smin_v16i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpminsd %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpminsd %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpminsd %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <16 x i32> %x0, %x1 @@ -3228,8 +3228,8 @@ ; AVX512-LABEL: reassociate_smin_v8i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpminsq %zmm3, %zmm2, %zmm1 -; AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpminsq %zmm0, %zmm2, %zmm0 +; AVX512-NEXT: vpminsq %zmm0, %zmm3, %zmm0 ; AVX512-NEXT: retq %t0 = add <8 x i64> %x0, %x1 diff --git a/llvm/test/CodeGen/X86/machine-combiner-int.ll b/llvm/test/CodeGen/X86/machine-combiner-int.ll --- a/llvm/test/CodeGen/X86/machine-combiner-int.ll +++ b/llvm/test/CodeGen/X86/machine-combiner-int.ll @@ -13,8 +13,8 @@ ; CHECK-NEXT: # kill ; CHECK-NEXT: # kill ; CHECK-NEXT: leal (%rdi,%rsi), %eax -; CHECK-NEXT: imull %ecx, %edx ; CHECK-NEXT: imull %edx, %eax +; CHECK-NEXT: imull %ecx, %eax ; CHECK-NEXT: # kill ; CHECK-NEXT: retq %t0 = add i16 %x0, %x1 @@ -29,8 +29,8 @@ ; CHECK-NEXT: # kill ; CHECK-NEXT: # kill ; CHECK-NEXT: leal (%rdi,%rsi), %eax -; CHECK-NEXT: imull %ecx, %edx ; CHECK-NEXT: imull %edx, %eax +; CHECK-NEXT: imull %ecx, %eax ; CHECK-NEXT: retq ; DEAD: ADD32rr @@ -47,8 +47,8 @@ ; CHECK-LABEL: reassociate_muls_i64: ; CHECK: # %bb.0: ; CHECK-NEXT: leaq (%rdi,%rsi), %rax -; CHECK-NEXT: imulq %rcx, %rdx ; CHECK-NEXT: imulq %rdx, %rax +; CHECK-NEXT: imulq %rcx, %rax ; CHECK-NEXT: retq %t0 = add i64 %x0, %x1 %t1 = mul i64 %x2, %t0 @@ -62,10 +62,10 @@ define i8 @reassociate_ands_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_ands_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subb %sil, %al +; CHECK-NEXT: andb %dl, %al ; CHECK-NEXT: andb %cl, %al -; CHECK-NEXT: andb %dil, %al ; CHECK-NEXT: # kill ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 @@ -79,10 +79,10 @@ define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_ands_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: andl %edx, %eax ; CHECK-NEXT: andl %ecx, %eax -; CHECK-NEXT: andl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = and i32 %x2, %t0 @@ -93,10 +93,10 @@ define i64 @reassociate_ands_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_ands_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdx, %rax -; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rsi, %rax +; CHECK-NEXT: andq %rdx, %rax ; CHECK-NEXT: andq %rcx, %rax -; CHECK-NEXT: andq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = and i64 %x2, %t0 @@ -110,10 +110,10 @@ define i8 @reassociate_ors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_ors_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subb %sil, %al +; CHECK-NEXT: orb %dl, %al ; CHECK-NEXT: orb %cl, %al -; CHECK-NEXT: orb %dil, %al ; CHECK-NEXT: # kill ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 @@ -127,10 +127,10 @@ define i32 @reassociate_ors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_ors_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: orl %edx, %eax ; CHECK-NEXT: orl %ecx, %eax -; CHECK-NEXT: orl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = or i32 %x2, %t0 @@ -141,10 +141,10 @@ define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_ors_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdx, %rax -; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rsi, %rax +; CHECK-NEXT: orq %rdx, %rax ; CHECK-NEXT: orq %rcx, %rax -; CHECK-NEXT: orq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = or i64 %x2, %t0 @@ -158,10 +158,10 @@ define i8 @reassociate_xors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) { ; CHECK-LABEL: reassociate_xors_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: subb %sil, %dil +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subb %sil, %al +; CHECK-NEXT: xorb %dl, %al ; CHECK-NEXT: xorb %cl, %al -; CHECK-NEXT: xorb %dil, %al ; CHECK-NEXT: # kill ; CHECK-NEXT: retq %t0 = sub i8 %x0, %x1 @@ -175,10 +175,10 @@ define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; CHECK-LABEL: reassociate_xors_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: subl %esi, %edi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: subl %esi, %eax +; CHECK-NEXT: xorl %edx, %eax ; CHECK-NEXT: xorl %ecx, %eax -; CHECK-NEXT: xorl %edi, %eax ; CHECK-NEXT: retq %t0 = sub i32 %x0, %x1 %t1 = xor i32 %x2, %t0 @@ -189,10 +189,10 @@ define i64 @reassociate_xors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { ; CHECK-LABEL: reassociate_xors_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdx, %rax -; CHECK-NEXT: subq %rsi, %rdi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: subq %rsi, %rax +; CHECK-NEXT: xorq %rdx, %rax ; CHECK-NEXT: xorq %rcx, %rax -; CHECK-NEXT: xorq %rdi, %rax ; CHECK-NEXT: retq %t0 = sub i64 %x0, %x1 %t1 = xor i64 %x2, %t0 diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll --- a/llvm/test/CodeGen/X86/recip-fastmath.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath.ll @@ -1143,13 +1143,13 @@ ; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 +; SANDY-NEXT: vrcpps %ymm1, %ymm4 ; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 ; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 -; SANDY-NEXT: vrcpps %ymm1, %ymm2 -; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 +; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1 ; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 -; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 -; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm4, %ymm1 +; SANDY-NEXT: vaddps %ymm1, %ymm4, %ymm1 ; SANDY-NEXT: retq ; ; HASWELL-LABEL: v16f32_one_step: @@ -1333,13 +1333,13 @@ ; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 ; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0 +; SANDY-NEXT: vrcpps %ymm1, %ymm3 ; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 ; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 -; SANDY-NEXT: vrcpps %ymm1, %ymm2 -; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3 -; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 -; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 -; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 +; SANDY-NEXT: vmulps %ymm3, %ymm1, %ymm2 +; SANDY-NEXT: vsubps %ymm2, %ymm4, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm3, %ymm2 +; SANDY-NEXT: vaddps %ymm2, %ymm3, %ymm2 ; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 ; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1 ; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll --- a/llvm/test/CodeGen/X86/recip-fastmath2.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll @@ -1227,14 +1227,14 @@ ; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm4 ; SANDY-NEXT: vmulps %ymm4, %ymm0, %ymm0 ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 -; SANDY-NEXT: vrcpps %ymm1, %ymm3 ; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 ; SANDY-NEXT: vaddps %ymm0, %ymm4, %ymm0 -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1] -; SANDY-NEXT: vmulps %ymm2, %ymm3, %ymm4 +; SANDY-NEXT: vrcpps %ymm1, %ymm2 +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1] +; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm4 ; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1 -; SANDY-NEXT: vsubps %ymm1, %ymm2, %ymm1 -; SANDY-NEXT: vmulps %ymm1, %ymm3, %ymm1 +; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 ; SANDY-NEXT: vaddps %ymm1, %ymm4, %ymm1 ; SANDY-NEXT: retq ; @@ -1397,13 +1397,13 @@ ; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 -; SANDY-NEXT: vrcpps %ymm1, %ymm4 ; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 ; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 -; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1 +; SANDY-NEXT: vrcpps %ymm1, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 ; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 -; SANDY-NEXT: vmulps %ymm1, %ymm4, %ymm1 -; SANDY-NEXT: vaddps %ymm1, %ymm4, %ymm1 +; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 +; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 ; SANDY-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 ; SANDY-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 ; SANDY-NEXT: vmulps %ymm0, %ymm3, %ymm0 @@ -1627,13 +1627,13 @@ ; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm5 ; SANDY-NEXT: vmulps %ymm5, %ymm0, %ymm0 ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 -; SANDY-NEXT: vrcpps %ymm1, %ymm3 ; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 ; SANDY-NEXT: vaddps %ymm0, %ymm5, %ymm0 -; SANDY-NEXT: vmulps %ymm3, %ymm1, %ymm2 -; SANDY-NEXT: vsubps %ymm2, %ymm4, %ymm2 -; SANDY-NEXT: vmulps %ymm2, %ymm3, %ymm2 -; SANDY-NEXT: vaddps %ymm2, %ymm3, %ymm2 +; SANDY-NEXT: vrcpps %ymm1, %ymm2 +; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3 +; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 +; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 +; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 ; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1] ; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm4 ; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1 @@ -1811,8 +1811,8 @@ ; SANDY-LABEL: v16f32_no_step2: ; SANDY: # %bb.0: ; SANDY-NEXT: vrcpps %ymm0, %ymm0 -; SANDY-NEXT: vrcpps %ymm1, %ymm1 ; SANDY-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; SANDY-NEXT: vrcpps %ymm1, %ymm1 ; SANDY-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; SANDY-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/scheduler-asm-moves.mir b/llvm/test/CodeGen/X86/scheduler-asm-moves.mir --- a/llvm/test/CodeGen/X86/scheduler-asm-moves.mir +++ b/llvm/test/CodeGen/X86/scheduler-asm-moves.mir @@ -122,6 +122,8 @@ ; CHECK-LABEL: name: synproxy_send_tcp_ipv6 ; CHECK: liveins: $eax, $edx ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32_abcd = COPY $eax ; CHECK-NEXT: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm $noreg, 1, $noreg, @csum_ipv6_magic_saddr, $noreg :: (dereferenceable load (s8) from `i8* getelementptr inbounds (%struct.in6_addr, %struct.in6_addr* @csum_ipv6_magic_saddr, i32 0, i32 0, i32 0)`) ; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, @csum_ipv6_magic_daddr, $noreg :: (dereferenceable load (s32) from @csum_ipv6_magic_daddr, !tbaa !4) ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, @csum_ipv6_magic_proto, $noreg :: (dereferenceable load (s32) from @csum_ipv6_magic_proto, !tbaa !4) @@ -130,11 +132,9 @@ ; CHECK-NEXT: MOV32mr $noreg, 1, $noreg, @csum_ipv6_magic_sum, $noreg, %2 :: (store (s32) into @csum_ipv6_magic_sum, !tbaa !4) ; CHECK-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, @synproxy_send_tcp_ipv6_nskb, $noreg :: (dereferenceable load (s32) from `i8** bitcast (%struct.sk_buff** @synproxy_send_tcp_ipv6_nskb to i8**)`, !tbaa !9) ; CHECK-NEXT: OR8mi [[MOV32rm2]], 1, $noreg, 0, $noreg, 3, implicit-def dead $eflags :: (store (s8) into %ir.4), (load (s8) from %ir.4) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32_abcd = COPY $eax - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edx - ; CHECK-NEXT: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s8) from %ir.5, !tbaa !11) + ; CHECK-NEXT: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s8) from %ir.5, !tbaa !11) ; CHECK-NEXT: MOV8mr $noreg, 1, $noreg, @synproxy_send_tcp_ipv6_fl6, $noreg, [[MOV8rm1]] :: (store (s8) into `i8* getelementptr inbounds (%struct.in6_addr, %struct.in6_addr* @synproxy_send_tcp_ipv6_fl6, i32 0, i32 0, i32 0)`, !tbaa !11) - ; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[COPY]].sub_8bit + ; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[COPY1]].sub_8bit ; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]] ; CHECK-NEXT: TCRETURNdi @fl6nthsecurity_skb_classify_flow, 0, csr_32, implicit $esp, implicit $ssp, implicit $eax %1:gr32 = COPY $edx diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll --- a/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll @@ -16,8 +16,8 @@ ; NHM-NEXT: mulss %xmm2, %xmm3 ; NHM-NEXT: mulss %xmm1, %xmm2 ; NHM-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; NHM-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; NHM-NEXT: mulss %xmm3, %xmm2 +; NHM-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; NHM-NEXT: cmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; NHM-NEXT: andnps %xmm2, %xmm0 ; NHM-NEXT: retq @@ -40,8 +40,8 @@ ; NHM-NEXT: mulps %xmm2, %xmm3 ; NHM-NEXT: mulps %xmm1, %xmm2 ; NHM-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; NHM-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; NHM-NEXT: mulps %xmm3, %xmm2 +; NHM-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; NHM-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] ; NHM-NEXT: cmpleps %xmm0, %xmm1 ; NHM-NEXT: andps %xmm2, %xmm1 @@ -55,8 +55,8 @@ ; SNB-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 ; SNB-NEXT: vmulps %xmm1, %xmm2, %xmm1 ; SNB-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; SNB-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; SNB-NEXT: vmulps %xmm1, %xmm3, %xmm1 +; SNB-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; SNB-NEXT: vmovaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] ; SNB-NEXT: vcmpleps %xmm0, %xmm2, %xmm0 ; SNB-NEXT: vandps %xmm1, %xmm0, %xmm0 @@ -126,8 +126,8 @@ ; SNB-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 ; SNB-NEXT: vmulps %ymm1, %ymm2, %ymm1 ; SNB-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; SNB-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; SNB-NEXT: vmulps %ymm1, %ymm3, %ymm1 +; SNB-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; SNB-NEXT: vmovaps {{.*#+}} ymm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] ; SNB-NEXT: vcmpleps %ymm0, %ymm2, %ymm0 ; SNB-NEXT: vandps %ymm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll --- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll @@ -85,8 +85,8 @@ ; SSE-NEXT: mulss %xmm2, %xmm3 ; SSE-NEXT: mulss %xmm1, %xmm2 ; SSE-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: mulss %xmm3, %xmm2 +; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: cmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: andnps %xmm2, %xmm0 ; SSE-NEXT: retq @@ -98,8 +98,8 @@ ; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 -; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -107,16 +107,15 @@ ; AVX512-LABEL: finite_f32_estimate_ieee_ninf: ; AVX512: # %bb.0: ; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem -; AVX512-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm1 ; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] -; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 -; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} -; AVX512-NEXT: vmovaps %xmm1, %xmm0 +; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm2 +; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem +; AVX512-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1 +; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: retq %call = tail call ninf afn float @__sqrtf_finite(float %f) #2 ret float %call @@ -229,8 +228,8 @@ ; SSE-NEXT: mulss %xmm2, %xmm3 ; SSE-NEXT: mulss %xmm1, %xmm2 ; SSE-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: mulss %xmm3, %xmm2 +; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: cmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: andnps %xmm2, %xmm0 ; SSE-NEXT: retq @@ -242,8 +241,8 @@ ; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 -; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -251,16 +250,15 @@ ; AVX512-LABEL: sqrtf_check_denorms_ninf: ; AVX512: # %bb.0: ; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem -; AVX512-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 -; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm1 ; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] -; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 -; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} -; AVX512-NEXT: vmovaps %xmm1, %xmm0 +; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm2 +; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem +; AVX512-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1 +; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: retq %call = tail call ninf afn float @__sqrtf_finite(float %x) #2 ret float %call @@ -290,8 +288,8 @@ ; SSE-NEXT: mulps %xmm2, %xmm3 ; SSE-NEXT: mulps %xmm1, %xmm2 ; SSE-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: mulps %xmm3, %xmm2 +; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] ; SSE-NEXT: cmpleps %xmm0, %xmm1 ; SSE-NEXT: andps %xmm2, %xmm1 @@ -305,8 +303,8 @@ ; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 ; AVX1-NEXT: vmulps %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vmulps %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] ; AVX1-NEXT: vcmpleps %xmm0, %xmm2, %xmm0 ; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 @@ -397,22 +395,21 @@ ; AVX1-LABEL: f32_estimate2: ; AVX1: # %bb.0: ; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 -; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1 -; AVX1-NEXT: vandnps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: f32_estimate2: ; AVX512: # %bb.0: -; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm1 -; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] -; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 -; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} -; AVX512-NEXT: vmovaps %xmm1, %xmm0 +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] +; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm2 +; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 +; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; AVX512-NEXT: retq %sqrt = tail call fast float @llvm.sqrt.f32(float %x) ret float %sqrt @@ -495,11 +492,11 @@ ; AVX1-LABEL: v4f32_estimate2: ; AVX1: # %bb.0: ; AVX1-NEXT: vrsqrtps %xmm0, %xmm1 -; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 -; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] -; AVX1-NEXT: vcmpleps %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vandps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] +; AVX1-NEXT: vcmpleps %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: v4f32_estimate2: @@ -667,11 +664,11 @@ ; AVX1-NEXT: vmulps %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0] ; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vrsqrtps %ymm1, %ymm5 ; AVX1-NEXT: vmulps %ymm0, %ymm4, %ymm0 -; AVX1-NEXT: vrsqrtps %ymm1, %ymm4 -; AVX1-NEXT: vmulps %ymm3, %ymm4, %ymm3 -; AVX1-NEXT: vmulps %ymm4, %ymm1, %ymm1 -; AVX1-NEXT: vmulps %ymm4, %ymm1, %ymm1 +; AVX1-NEXT: vmulps %ymm3, %ymm5, %ymm3 +; AVX1-NEXT: vmulps %ymm5, %ymm1, %ymm1 +; AVX1-NEXT: vmulps %ymm5, %ymm1, %ymm1 ; AVX1-NEXT: vaddps %ymm2, %ymm1, %ymm1 ; AVX1-NEXT: vmulps %ymm1, %ymm3, %ymm1 ; AVX1-NEXT: retq @@ -799,8 +796,8 @@ ; SSE-NEXT: mulps %xmm3, %xmm2 ; SSE-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 -; SSE-NEXT: mulps %xmm2, %xmm3 ; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: mulps %xmm2, %xmm3 ; SSE-NEXT: divps %xmm1, %xmm3 ; SSE-NEXT: mulps %xmm3, %xmm0 ; SSE-NEXT: retq @@ -812,8 +809,8 @@ ; AVX1-NEXT: vmulps %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 -; AVX1-NEXT: vmulps %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vmulps %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vdivps %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -827,8 +824,8 @@ ; AVX512-NEXT: vmulps %xmm3, %xmm2, %xmm2 ; AVX512-NEXT: vbroadcastss {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0] ; AVX512-NEXT: vaddps %xmm3, %xmm2, %xmm2 -; AVX512-NEXT: vbroadcastss {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN] ; AVX512-NEXT: vmulps %xmm2, %xmm4, %xmm2 +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN] ; AVX512-NEXT: vandps %xmm3, %xmm1, %xmm1 ; AVX512-NEXT: vdivps %xmm1, %xmm2, %xmm1 ; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0 @@ -845,16 +842,16 @@ define double @div_sqrt_fabs_f64(double %x, double %y, double %z) { ; SSE-LABEL: div_sqrt_fabs_f64: ; SSE: # %bb.0: -; SSE-NEXT: sqrtsd %xmm2, %xmm2 ; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: sqrtsd %xmm2, %xmm2 ; SSE-NEXT: mulsd %xmm2, %xmm1 ; SSE-NEXT: divsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: div_sqrt_fabs_f64: ; AVX: # %bb.0: -; AVX-NEXT: vsqrtsd %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vsqrtsd %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vmulsd %xmm1, %xmm2, %xmm1 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll --- a/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll @@ -914,8 +914,8 @@ ; SSE-NEXT: addpd %xmm6, %xmm2 ; SSE-NEXT: addpd %xmm7, %xmm3 ; SSE-NEXT: addpd %xmm5, %xmm1 -; SSE-NEXT: addpd %xmm3, %xmm1 ; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm4 +; SSE-NEXT: addpd %xmm3, %xmm1 ; SSE-NEXT: addpd %xmm2, %xmm4 ; SSE-NEXT: addpd %xmm1, %xmm4 ; SSE-NEXT: movapd %xmm4, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll --- a/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll @@ -648,8 +648,8 @@ ; SSE-NEXT: mulpd %xmm6, %xmm2 ; SSE-NEXT: mulpd %xmm7, %xmm3 ; SSE-NEXT: mulpd %xmm5, %xmm1 -; SSE-NEXT: mulpd %xmm3, %xmm1 ; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm4 +; SSE-NEXT: mulpd %xmm3, %xmm1 ; SSE-NEXT: mulpd %xmm2, %xmm4 ; SSE-NEXT: mulpd %xmm1, %xmm4 ; SSE-NEXT: movapd %xmm4, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -3253,9 +3253,9 @@ ; SSSE3-NEXT: movdqa %xmm0, %xmm1 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[2,3],zero,zero,zero,zero,zero,zero ; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,0,0,0,11,0,0,0,0,0,0,0,11,0,0,0] +; SSSE3-NEXT: por %xmm2, %xmm1 ; SSSE3-NEXT: movdqa %xmm0, %xmm3 ; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[4,5],zero,zero,zero,zero,zero,zero,xmm3[6,7],zero,zero,zero,zero,zero,zero -; SSSE3-NEXT: por %xmm2, %xmm1 ; SSSE3-NEXT: por %xmm2, %xmm3 ; SSSE3-NEXT: movdqa %xmm0, %xmm4 ; SSSE3-NEXT: pshufb {{.*#+}} xmm4 = xmm4[8,9],zero,zero,zero,zero,zero,zero,xmm4[10,11],zero,zero,zero,zero,zero,zero