Index: test/CodeGen/AArch64/unfold-masked-merge.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/unfold-masked-merge.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +define i32 @out(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: out: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, %mask + %notmask = xor i32 %mask, -1 + %my = and i32 %y, %notmask + %r = or i32 %mx, %my + ret i32 %r +} + +define <4 x i32> @out_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: out_vec: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <4 x i32> %x, %mask + %notmask = xor <4 x i32> %mask, + %my = and <4 x i32> %y, %notmask + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @out_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: out_vec_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mx = and <4 x i32> %x, %mask + %notmask = xor <4 x i32> %mask, + %my = and <4 x i32> %y, %notmask + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +; Should be the same as the previous one. +define i32 @in(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, w2 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} + +define <4 x i32> @in_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-LABEL: in_vec: +; CHECK: // %bb.0: +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %n0 = xor <4 x i32> %x, %y + %n1 = and <4 x i32> %n0, %mask + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +} Index: test/CodeGen/X86/unfold-masked-merge.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/unfold-masked-merge.ll @@ -0,0 +1,304 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-sse < %s | FileCheck %s --check-prefix=CHECK-ZERO +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-sse < %s | FileCheck %s --check-prefix=CHECK-ONE +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,+sse < %s | FileCheck %s --check-prefix=CHECK-TWO +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+sse < %s | FileCheck %s --check-prefix=CHECK-THREE + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +define i32 @out(i32 %x, i32 %y, i32 %mask) { +; CHECK-ZERO-LABEL: out: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: andl %edx, %edi +; CHECK-ZERO-NEXT: notl %edx +; CHECK-ZERO-NEXT: andl %esi, %edx +; CHECK-ZERO-NEXT: orl %edi, %edx +; CHECK-ZERO-NEXT: movl %edx, %eax +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: out: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: andl %edx, %edi +; CHECK-ONE-NEXT: andnl %esi, %edx, %eax +; CHECK-ONE-NEXT: orl %edi, %eax +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: out: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: andl %edx, %edi +; CHECK-TWO-NEXT: notl %edx +; CHECK-TWO-NEXT: andl %esi, %edx +; CHECK-TWO-NEXT: orl %edi, %edx +; CHECK-TWO-NEXT: movl %edx, %eax +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: out: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: andl %edx, %edi +; CHECK-THREE-NEXT: andnl %esi, %edx, %eax +; CHECK-THREE-NEXT: orl %edi, %eax +; CHECK-THREE-NEXT: retq + %mx = and i32 %x, %mask + %notmask = xor i32 %mask, -1 + %my = and i32 %y, %notmask + %r = or i32 %mx, %my + ret i32 %r +} + +define <4 x i32> @out_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-ZERO-LABEL: out_vec: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: pushq %rbx +; CHECK-ZERO-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZERO-NEXT: .cfi_offset %rbx, -16 +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %ebx +; CHECK-ZERO-NEXT: andl %ebx, %r8d +; CHECK-ZERO-NEXT: andl %eax, %ecx +; CHECK-ZERO-NEXT: andl %r11d, %edx +; CHECK-ZERO-NEXT: andl %r10d, %esi +; CHECK-ZERO-NEXT: notl %r11d +; CHECK-ZERO-NEXT: notl %eax +; CHECK-ZERO-NEXT: notl %ebx +; CHECK-ZERO-NEXT: notl %r10d +; CHECK-ZERO-NEXT: andl %r9d, %r10d +; CHECK-ZERO-NEXT: orl %esi, %r10d +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %ebx +; CHECK-ZERO-NEXT: orl %r8d, %ebx +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: orl %ecx, %eax +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %r11d +; CHECK-ZERO-NEXT: orl %edx, %r11d +; CHECK-ZERO-NEXT: movl %ebx, 12(%rdi) +; CHECK-ZERO-NEXT: movl %eax, 8(%rdi) +; CHECK-ZERO-NEXT: movl %r11d, 4(%rdi) +; CHECK-ZERO-NEXT: movl %r10d, (%rdi) +; CHECK-ZERO-NEXT: movq %rdi, %rax +; CHECK-ZERO-NEXT: popq %rbx +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: out_vec: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: pushq %rbx +; CHECK-ONE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ONE-NEXT: .cfi_offset %rbx, -16 +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %ebx +; CHECK-ONE-NEXT: andl %ebx, %r8d +; CHECK-ONE-NEXT: andl %r11d, %ecx +; CHECK-ONE-NEXT: andl %r10d, %edx +; CHECK-ONE-NEXT: andl %eax, %esi +; CHECK-ONE-NEXT: andnl %r9d, %eax, %eax +; CHECK-ONE-NEXT: orl %esi, %eax +; CHECK-ONE-NEXT: andnl {{[0-9]+}}(%rsp), %ebx, %esi +; CHECK-ONE-NEXT: orl %r8d, %esi +; CHECK-ONE-NEXT: andnl {{[0-9]+}}(%rsp), %r11d, %ebx +; CHECK-ONE-NEXT: orl %ecx, %ebx +; CHECK-ONE-NEXT: andnl {{[0-9]+}}(%rsp), %r10d, %ecx +; CHECK-ONE-NEXT: orl %edx, %ecx +; CHECK-ONE-NEXT: movl %esi, 12(%rdi) +; CHECK-ONE-NEXT: movl %ebx, 8(%rdi) +; CHECK-ONE-NEXT: movl %ecx, 4(%rdi) +; CHECK-ONE-NEXT: movl %eax, (%rdi) +; CHECK-ONE-NEXT: movq %rdi, %rax +; CHECK-ONE-NEXT: popq %rbx +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: out_vec: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: andps %xmm2, %xmm0 +; CHECK-TWO-NEXT: andnps %xmm1, %xmm2 +; CHECK-TWO-NEXT: orps %xmm2, %xmm0 +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: out_vec: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: andps %xmm2, %xmm0 +; CHECK-THREE-NEXT: andnps %xmm1, %xmm2 +; CHECK-THREE-NEXT: orps %xmm2, %xmm0 +; CHECK-THREE-NEXT: retq + %mx = and <4 x i32> %x, %mask + %notmask = xor <4 x i32> %mask, + %my = and <4 x i32> %y, %notmask + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @out_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-ZERO-LABEL: out_vec_undef: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: andl %eax, %r8d +; CHECK-ZERO-NEXT: andl %r11d, %edx +; CHECK-ZERO-NEXT: andl %r10d, %esi +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %ecx +; CHECK-ZERO-NEXT: notl %r11d +; CHECK-ZERO-NEXT: notl %eax +; CHECK-ZERO-NEXT: notl %r10d +; CHECK-ZERO-NEXT: andl %r9d, %r10d +; CHECK-ZERO-NEXT: orl %esi, %r10d +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: orl %r8d, %eax +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %r11d +; CHECK-ZERO-NEXT: orl %edx, %r11d +; CHECK-ZERO-NEXT: movl %ecx, 8(%rdi) +; CHECK-ZERO-NEXT: movl %eax, 12(%rdi) +; CHECK-ZERO-NEXT: movl %r11d, 4(%rdi) +; CHECK-ZERO-NEXT: movl %r10d, (%rdi) +; CHECK-ZERO-NEXT: movq %rdi, %rax +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: out_vec_undef: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ONE-NEXT: andl %r11d, %r8d +; CHECK-ONE-NEXT: andl %r10d, %edx +; CHECK-ONE-NEXT: andl %eax, %esi +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %ecx +; CHECK-ONE-NEXT: andnl %r9d, %eax, %r9d +; CHECK-ONE-NEXT: orl %esi, %r9d +; CHECK-ONE-NEXT: andnl {{[0-9]+}}(%rsp), %r11d, %esi +; CHECK-ONE-NEXT: orl %r8d, %esi +; CHECK-ONE-NEXT: andnl {{[0-9]+}}(%rsp), %r10d, %eax +; CHECK-ONE-NEXT: orl %edx, %eax +; CHECK-ONE-NEXT: movl %esi, 12(%rdi) +; CHECK-ONE-NEXT: movl %ecx, 8(%rdi) +; CHECK-ONE-NEXT: movl %eax, 4(%rdi) +; CHECK-ONE-NEXT: movl %r9d, (%rdi) +; CHECK-ONE-NEXT: movq %rdi, %rax +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: out_vec_undef: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: andps %xmm2, %xmm0 +; CHECK-TWO-NEXT: andnps %xmm1, %xmm2 +; CHECK-TWO-NEXT: orps %xmm2, %xmm0 +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: out_vec_undef: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: andps %xmm2, %xmm0 +; CHECK-THREE-NEXT: andnps %xmm1, %xmm2 +; CHECK-THREE-NEXT: orps %xmm2, %xmm0 +; CHECK-THREE-NEXT: retq + %mx = and <4 x i32> %x, %mask + %notmask = xor <4 x i32> %mask, + %my = and <4 x i32> %y, %notmask + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +; Should be the same as the previous one. +define i32 @in(i32 %x, i32 %y, i32 %mask) { +; CHECK-ZERO-LABEL: in: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: xorl %esi, %edi +; CHECK-ZERO-NEXT: andl %edx, %edi +; CHECK-ZERO-NEXT: xorl %esi, %edi +; CHECK-ZERO-NEXT: movl %edi, %eax +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: in: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: xorl %esi, %edi +; CHECK-ONE-NEXT: andl %edx, %edi +; CHECK-ONE-NEXT: xorl %esi, %edi +; CHECK-ONE-NEXT: movl %edi, %eax +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: in: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: xorl %esi, %edi +; CHECK-TWO-NEXT: andl %edx, %edi +; CHECK-TWO-NEXT: xorl %esi, %edi +; CHECK-TWO-NEXT: movl %edi, %eax +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: in: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: xorl %esi, %edi +; CHECK-THREE-NEXT: andl %edx, %edi +; CHECK-THREE-NEXT: xorl %esi, %edi +; CHECK-THREE-NEXT: movl %edi, %eax +; CHECK-THREE-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} + +define <4 x i32> @in_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-ZERO-LABEL: in_vec: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: xorl %r9d, %esi +; CHECK-ZERO-NEXT: xorl %eax, %edx +; CHECK-ZERO-NEXT: xorl %r11d, %ecx +; CHECK-ZERO-NEXT: xorl %r10d, %r8d +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %r8d +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %ecx +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %edx +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %esi +; CHECK-ZERO-NEXT: xorl %r9d, %esi +; CHECK-ZERO-NEXT: xorl %eax, %edx +; CHECK-ZERO-NEXT: xorl %r11d, %ecx +; CHECK-ZERO-NEXT: xorl %r10d, %r8d +; CHECK-ZERO-NEXT: movl %r8d, 12(%rdi) +; CHECK-ZERO-NEXT: movl %ecx, 8(%rdi) +; CHECK-ZERO-NEXT: movl %edx, 4(%rdi) +; CHECK-ZERO-NEXT: movl %esi, (%rdi) +; CHECK-ZERO-NEXT: movq %rdi, %rax +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: in_vec: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ONE-NEXT: xorl %r9d, %esi +; CHECK-ONE-NEXT: xorl %eax, %edx +; CHECK-ONE-NEXT: xorl %r11d, %ecx +; CHECK-ONE-NEXT: xorl %r10d, %r8d +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %r8d +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %ecx +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %edx +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %esi +; CHECK-ONE-NEXT: xorl %r9d, %esi +; CHECK-ONE-NEXT: xorl %eax, %edx +; CHECK-ONE-NEXT: xorl %r11d, %ecx +; CHECK-ONE-NEXT: xorl %r10d, %r8d +; CHECK-ONE-NEXT: movl %r8d, 12(%rdi) +; CHECK-ONE-NEXT: movl %ecx, 8(%rdi) +; CHECK-ONE-NEXT: movl %edx, 4(%rdi) +; CHECK-ONE-NEXT: movl %esi, (%rdi) +; CHECK-ONE-NEXT: movq %rdi, %rax +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: in_vec: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: xorps %xmm1, %xmm0 +; CHECK-TWO-NEXT: andps %xmm2, %xmm0 +; CHECK-TWO-NEXT: xorps %xmm1, %xmm0 +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: in_vec: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: xorps %xmm1, %xmm0 +; CHECK-THREE-NEXT: andps %xmm2, %xmm0 +; CHECK-THREE-NEXT: xorps %xmm1, %xmm0 +; CHECK-THREE-NEXT: retq + %n0 = xor <4 x i32> %x, %y + %n1 = and <4 x i32> %n0, %mask + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +}