Index: test/CodeGen/AArch64/unfold-masked-merge.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/unfold-masked-merge.ll @@ -0,0 +1,630 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-sse < %s | FileCheck %s --check-prefix=CHECK-ZERO +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-sse < %s | FileCheck %s --check-prefix=CHECK-ONE +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,+sse < %s | FileCheck %s --check-prefix=CHECK-TWO +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+sse < %s | FileCheck %s --check-prefix=CHECK-THREE + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +define <4 x i32> @out_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-ZERO-LABEL: out_vec: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: pushq %rbx +; CHECK-ZERO-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZERO-NEXT: .cfi_offset %rbx, -16 +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %ebx +; CHECK-ZERO-NEXT: andl %ebx, %r8d +; CHECK-ZERO-NEXT: andl %eax, %ecx +; CHECK-ZERO-NEXT: andl %r11d, %edx +; CHECK-ZERO-NEXT: andl %r10d, %esi +; CHECK-ZERO-NEXT: notl %r11d +; CHECK-ZERO-NEXT: notl %eax +; CHECK-ZERO-NEXT: notl %ebx +; CHECK-ZERO-NEXT: notl %r10d +; CHECK-ZERO-NEXT: andl %r9d, %r10d +; CHECK-ZERO-NEXT: orl %esi, %r10d +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %ebx +; CHECK-ZERO-NEXT: orl %r8d, %ebx +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: orl %ecx, %eax +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %r11d +; CHECK-ZERO-NEXT: orl %edx, %r11d +; CHECK-ZERO-NEXT: movl %ebx, 12(%rdi) +; CHECK-ZERO-NEXT: movl %eax, 8(%rdi) +; CHECK-ZERO-NEXT: movl %r11d, 4(%rdi) +; CHECK-ZERO-NEXT: movl %r10d, (%rdi) +; CHECK-ZERO-NEXT: movq %rdi, %rax +; CHECK-ZERO-NEXT: popq %rbx +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: out_vec: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: pushq %rbx +; CHECK-ONE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ONE-NEXT: .cfi_offset %rbx, -16 +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %ebx +; CHECK-ONE-NEXT: andl %ebx, %r8d +; CHECK-ONE-NEXT: andl %r11d, %ecx +; CHECK-ONE-NEXT: andl %r10d, %edx +; CHECK-ONE-NEXT: andl %eax, %esi +; CHECK-ONE-NEXT: andnl %r9d, %eax, %eax +; CHECK-ONE-NEXT: orl %esi, %eax +; CHECK-ONE-NEXT: andnl {{[0-9]+}}(%rsp), %ebx, %esi +; CHECK-ONE-NEXT: orl %r8d, %esi +; CHECK-ONE-NEXT: andnl {{[0-9]+}}(%rsp), %r11d, %ebx +; CHECK-ONE-NEXT: orl %ecx, %ebx +; CHECK-ONE-NEXT: andnl {{[0-9]+}}(%rsp), %r10d, %ecx +; CHECK-ONE-NEXT: orl %edx, %ecx +; CHECK-ONE-NEXT: movl %esi, 12(%rdi) +; CHECK-ONE-NEXT: movl %ebx, 8(%rdi) +; CHECK-ONE-NEXT: movl %ecx, 4(%rdi) +; CHECK-ONE-NEXT: movl %eax, (%rdi) +; CHECK-ONE-NEXT: movq %rdi, %rax +; CHECK-ONE-NEXT: popq %rbx +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: out_vec: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: andps %xmm2, %xmm0 +; CHECK-TWO-NEXT: andnps %xmm1, %xmm2 +; CHECK-TWO-NEXT: orps %xmm2, %xmm0 +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: out_vec: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: andps %xmm2, %xmm0 +; CHECK-THREE-NEXT: andnps %xmm1, %xmm2 +; CHECK-THREE-NEXT: orps %xmm2, %xmm0 +; CHECK-THREE-NEXT: retq + %mx = and <4 x i32> %x, %mask + %notmask = xor <4 x i32> %mask, + %my = and <4 x i32> %y, %notmask + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @out_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-ZERO-LABEL: out_vec_undef: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: andl %eax, %r8d +; CHECK-ZERO-NEXT: andl %r11d, %edx +; CHECK-ZERO-NEXT: andl %r10d, %esi +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %ecx +; CHECK-ZERO-NEXT: notl %r11d +; CHECK-ZERO-NEXT: notl %eax +; CHECK-ZERO-NEXT: notl %r10d +; CHECK-ZERO-NEXT: andl %r9d, %r10d +; CHECK-ZERO-NEXT: orl %esi, %r10d +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: orl %r8d, %eax +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %r11d +; CHECK-ZERO-NEXT: orl %edx, %r11d +; CHECK-ZERO-NEXT: movl %ecx, 8(%rdi) +; CHECK-ZERO-NEXT: movl %eax, 12(%rdi) +; CHECK-ZERO-NEXT: movl %r11d, 4(%rdi) +; CHECK-ZERO-NEXT: movl %r10d, (%rdi) +; CHECK-ZERO-NEXT: movq %rdi, %rax +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: out_vec_undef: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ONE-NEXT: andl %r11d, %r8d +; CHECK-ONE-NEXT: andl %r10d, %edx +; CHECK-ONE-NEXT: andl %eax, %esi +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %ecx +; CHECK-ONE-NEXT: andnl %r9d, %eax, %r9d +; CHECK-ONE-NEXT: orl %esi, %r9d +; CHECK-ONE-NEXT: andnl {{[0-9]+}}(%rsp), %r11d, %esi +; CHECK-ONE-NEXT: orl %r8d, %esi +; CHECK-ONE-NEXT: andnl {{[0-9]+}}(%rsp), %r10d, %eax +; CHECK-ONE-NEXT: orl %edx, %eax +; CHECK-ONE-NEXT: movl %esi, 12(%rdi) +; CHECK-ONE-NEXT: movl %ecx, 8(%rdi) +; CHECK-ONE-NEXT: movl %eax, 4(%rdi) +; CHECK-ONE-NEXT: movl %r9d, (%rdi) +; CHECK-ONE-NEXT: movq %rdi, %rax +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: out_vec_undef: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: andps %xmm2, %xmm0 +; CHECK-TWO-NEXT: andnps %xmm1, %xmm2 +; CHECK-TWO-NEXT: orps %xmm2, %xmm0 +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: out_vec_undef: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: andps %xmm2, %xmm0 +; CHECK-THREE-NEXT: andnps %xmm1, %xmm2 +; CHECK-THREE-NEXT: orps %xmm2, %xmm0 +; CHECK-THREE-NEXT: retq + %mx = and <4 x i32> %x, %mask + %notmask = xor <4 x i32> %mask, + %my = and <4 x i32> %y, %notmask + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @out_vec_constmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-ZERO-LABEL: out_vec_constmask: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: andl $16776960, %r8d # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: andl $16776960, %ecx # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: andl $16776960, %edx # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: movl $-16776961, %eax # imm = 0xFF0000FF +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ZERO-NEXT: andl %eax, %r10d +; CHECK-ZERO-NEXT: orl %r8d, %r10d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r8d +; CHECK-ZERO-NEXT: andl %eax, %r8d +; CHECK-ZERO-NEXT: orl %ecx, %r8d +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: orl %edx, %eax +; CHECK-ZERO-NEXT: andl $-16776961, %r9d # imm = 0xFF0000FF +; CHECK-ZERO-NEXT: orl %esi, %r9d +; CHECK-ZERO-NEXT: movl %r10d, 12(%rdi) +; CHECK-ZERO-NEXT: movl %r8d, 8(%rdi) +; CHECK-ZERO-NEXT: movl %eax, 4(%rdi) +; CHECK-ZERO-NEXT: movl %r9d, (%rdi) +; CHECK-ZERO-NEXT: movq %rdi, %rax +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: out_vec_constmask: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: andl $16776960, %r8d # imm = 0xFFFF00 +; CHECK-ONE-NEXT: andl $16776960, %ecx # imm = 0xFFFF00 +; CHECK-ONE-NEXT: andl $16776960, %edx # imm = 0xFFFF00 +; CHECK-ONE-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-ONE-NEXT: movl $-16776961, %eax # imm = 0xFF0000FF +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ONE-NEXT: andl %eax, %r10d +; CHECK-ONE-NEXT: orl %r8d, %r10d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r8d +; CHECK-ONE-NEXT: andl %eax, %r8d +; CHECK-ONE-NEXT: orl %ecx, %r8d +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-ONE-NEXT: orl %edx, %eax +; CHECK-ONE-NEXT: andl $-16776961, %r9d # imm = 0xFF0000FF +; CHECK-ONE-NEXT: orl %esi, %r9d +; CHECK-ONE-NEXT: movl %r10d, 12(%rdi) +; CHECK-ONE-NEXT: movl %r8d, 8(%rdi) +; CHECK-ONE-NEXT: movl %eax, 4(%rdi) +; CHECK-ONE-NEXT: movl %r9d, (%rdi) +; CHECK-ONE-NEXT: movq %rdi, %rax +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: out_vec_constmask: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0] +; CHECK-TWO-NEXT: andps %xmm2, %xmm0 +; CHECK-TWO-NEXT: andnps %xmm1, %xmm2 +; CHECK-TWO-NEXT: orps %xmm2, %xmm0 +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: out_vec_constmask: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0] +; CHECK-THREE-NEXT: andps %xmm2, %xmm0 +; CHECK-THREE-NEXT: andnps %xmm1, %xmm2 +; CHECK-THREE-NEXT: orps %xmm2, %xmm0 +; CHECK-THREE-NEXT: retq + %mx = and <4 x i32> %x, + %my = and <4 x i32> %y, + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @out_vec_constmask_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-ZERO-LABEL: out_vec_constmask_undef: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: andl $16776960, %r8d # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: andl $16776960, %edx # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: movl $-16776961, %eax # imm = 0xFF0000FF +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-ZERO-NEXT: andl %eax, %ecx +; CHECK-ZERO-NEXT: orl %r8d, %ecx +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: orl %edx, %eax +; CHECK-ZERO-NEXT: andl $-16776961, %r9d # imm = 0xFF0000FF +; CHECK-ZERO-NEXT: orl %esi, %r9d +; CHECK-ZERO-NEXT: movl %ecx, 12(%rdi) +; CHECK-ZERO-NEXT: movl %eax, 4(%rdi) +; CHECK-ZERO-NEXT: movl %r9d, (%rdi) +; CHECK-ZERO-NEXT: movl $0, 8(%rdi) +; CHECK-ZERO-NEXT: movq %rdi, %rax +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: out_vec_constmask_undef: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: andl $16776960, %r8d # imm = 0xFFFF00 +; CHECK-ONE-NEXT: andl $16776960, %edx # imm = 0xFFFF00 +; CHECK-ONE-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-ONE-NEXT: movl $-16776961, %eax # imm = 0xFF0000FF +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-ONE-NEXT: andl %eax, %ecx +; CHECK-ONE-NEXT: orl %r8d, %ecx +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-ONE-NEXT: orl %edx, %eax +; CHECK-ONE-NEXT: andl $-16776961, %r9d # imm = 0xFF0000FF +; CHECK-ONE-NEXT: orl %esi, %r9d +; CHECK-ONE-NEXT: movl %ecx, 12(%rdi) +; CHECK-ONE-NEXT: movl %eax, 4(%rdi) +; CHECK-ONE-NEXT: movl %r9d, (%rdi) +; CHECK-ONE-NEXT: movl $0, 8(%rdi) +; CHECK-ONE-NEXT: movq %rdi, %rax +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: out_vec_constmask_undef: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,0,0,255,255,0,255,255,255,255,0,255,255,0] +; CHECK-TWO-NEXT: andps %xmm2, %xmm0 +; CHECK-TWO-NEXT: andnps %xmm1, %xmm2 +; CHECK-TWO-NEXT: orps %xmm2, %xmm0 +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: out_vec_constmask_undef: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,0,0,255,255,0,255,255,255,255,0,255,255,0] +; CHECK-THREE-NEXT: andps %xmm2, %xmm0 +; CHECK-THREE-NEXT: andnps %xmm1, %xmm2 +; CHECK-THREE-NEXT: orps %xmm2, %xmm0 +; CHECK-THREE-NEXT: retq + %mx = and <4 x i32> %x, + %my = and <4 x i32> %y, + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @out_vec_constmask_nonsplat(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-ZERO-LABEL: out_vec_constmask_nonsplat: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: andl $16711935, %r8d # imm = 0xFF00FF +; CHECK-ZERO-NEXT: andl $16776960, %ecx # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: andl $16711935, %edx # imm = 0xFF00FF +; CHECK-ZERO-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: movl $-16711936, %r11d # imm = 0xFF00FF00 +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ZERO-NEXT: andl %r11d, %r10d +; CHECK-ZERO-NEXT: orl %r8d, %r10d +; CHECK-ZERO-NEXT: movl $-16776961, %eax # imm = 0xFF0000FF +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: orl %ecx, %eax +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %r11d +; CHECK-ZERO-NEXT: orl %edx, %r11d +; CHECK-ZERO-NEXT: andl $-16776961, %r9d # imm = 0xFF0000FF +; CHECK-ZERO-NEXT: orl %esi, %r9d +; CHECK-ZERO-NEXT: movl %r10d, 12(%rdi) +; CHECK-ZERO-NEXT: movl %eax, 8(%rdi) +; CHECK-ZERO-NEXT: movl %r11d, 4(%rdi) +; CHECK-ZERO-NEXT: movl %r9d, (%rdi) +; CHECK-ZERO-NEXT: movq %rdi, %rax +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: out_vec_constmask_nonsplat: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: andl $16711935, %r8d # imm = 0xFF00FF +; CHECK-ONE-NEXT: andl $16776960, %ecx # imm = 0xFFFF00 +; CHECK-ONE-NEXT: andl $16711935, %edx # imm = 0xFF00FF +; CHECK-ONE-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-ONE-NEXT: movl $-16711936, %r11d # imm = 0xFF00FF00 +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ONE-NEXT: andl %r11d, %r10d +; CHECK-ONE-NEXT: orl %r8d, %r10d +; CHECK-ONE-NEXT: movl $-16776961, %eax # imm = 0xFF0000FF +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-ONE-NEXT: orl %ecx, %eax +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %r11d +; CHECK-ONE-NEXT: orl %edx, %r11d +; CHECK-ONE-NEXT: andl $-16776961, %r9d # imm = 0xFF0000FF +; CHECK-ONE-NEXT: orl %esi, %r9d +; CHECK-ONE-NEXT: movl %r10d, 12(%rdi) +; CHECK-ONE-NEXT: movl %eax, 8(%rdi) +; CHECK-ONE-NEXT: movl %r11d, 4(%rdi) +; CHECK-ONE-NEXT: movl %r9d, (%rdi) +; CHECK-ONE-NEXT: movq %rdi, %rax +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: out_vec_constmask_nonsplat: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,0,255,0,255,0,0,255,255,0,255,0,255,0] +; CHECK-TWO-NEXT: andps %xmm2, %xmm0 +; CHECK-TWO-NEXT: andnps %xmm1, %xmm2 +; CHECK-TWO-NEXT: orps %xmm2, %xmm0 +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: out_vec_constmask_nonsplat: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,0,255,0,255,0,0,255,255,0,255,0,255,0] +; CHECK-THREE-NEXT: andps %xmm2, %xmm0 +; CHECK-THREE-NEXT: andnps %xmm1, %xmm2 +; CHECK-THREE-NEXT: orps %xmm2, %xmm0 +; CHECK-THREE-NEXT: retq + %mx = and <4 x i32> %x, + %my = and <4 x i32> %y, + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define <4 x i32> @in_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-ZERO-LABEL: in_vec: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: xorl %r9d, %esi +; CHECK-ZERO-NEXT: xorl %eax, %edx +; CHECK-ZERO-NEXT: xorl %r11d, %ecx +; CHECK-ZERO-NEXT: xorl %r10d, %r8d +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %r8d +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %ecx +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %edx +; CHECK-ZERO-NEXT: andl {{[0-9]+}}(%rsp), %esi +; CHECK-ZERO-NEXT: xorl %r9d, %esi +; CHECK-ZERO-NEXT: xorl %eax, %edx +; CHECK-ZERO-NEXT: xorl %r11d, %ecx +; CHECK-ZERO-NEXT: xorl %r10d, %r8d +; CHECK-ZERO-NEXT: movl %r8d, 12(%rdi) +; CHECK-ZERO-NEXT: movl %ecx, 8(%rdi) +; CHECK-ZERO-NEXT: movl %edx, 4(%rdi) +; CHECK-ZERO-NEXT: movl %esi, (%rdi) +; CHECK-ZERO-NEXT: movq %rdi, %rax +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: in_vec: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ONE-NEXT: xorl %r9d, %esi +; CHECK-ONE-NEXT: xorl %eax, %edx +; CHECK-ONE-NEXT: xorl %r11d, %ecx +; CHECK-ONE-NEXT: xorl %r10d, %r8d +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %r8d +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %ecx +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %edx +; CHECK-ONE-NEXT: andl {{[0-9]+}}(%rsp), %esi +; CHECK-ONE-NEXT: xorl %r9d, %esi +; CHECK-ONE-NEXT: xorl %eax, %edx +; CHECK-ONE-NEXT: xorl %r11d, %ecx +; CHECK-ONE-NEXT: xorl %r10d, %r8d +; CHECK-ONE-NEXT: movl %r8d, 12(%rdi) +; CHECK-ONE-NEXT: movl %ecx, 8(%rdi) +; CHECK-ONE-NEXT: movl %edx, 4(%rdi) +; CHECK-ONE-NEXT: movl %esi, (%rdi) +; CHECK-ONE-NEXT: movq %rdi, %rax +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: in_vec: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: xorps %xmm1, %xmm0 +; CHECK-TWO-NEXT: andps %xmm2, %xmm0 +; CHECK-TWO-NEXT: xorps %xmm1, %xmm0 +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: in_vec: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: xorps %xmm1, %xmm0 +; CHECK-THREE-NEXT: andps %xmm2, %xmm0 +; CHECK-THREE-NEXT: xorps %xmm1, %xmm0 +; CHECK-THREE-NEXT: retq + %n0 = xor <4 x i32> %x, %y + %n1 = and <4 x i32> %n0, %mask + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +} + +define <4 x i32> @in_vec_constmask_splat(<4 x i32> %x, <4 x i32> %y) { +; CHECK-ZERO-LABEL: in_vec_constmask_splat: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: xorl %r9d, %esi +; CHECK-ZERO-NEXT: xorl %eax, %edx +; CHECK-ZERO-NEXT: xorl %r11d, %ecx +; CHECK-ZERO-NEXT: xorl %r10d, %r8d +; CHECK-ZERO-NEXT: andl $16776960, %r8d # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: andl $16776960, %ecx # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: andl $16776960, %edx # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: xorl %r9d, %esi +; CHECK-ZERO-NEXT: xorl %eax, %edx +; CHECK-ZERO-NEXT: xorl %r11d, %ecx +; CHECK-ZERO-NEXT: xorl %r10d, %r8d +; CHECK-ZERO-NEXT: movl %r8d, 12(%rdi) +; CHECK-ZERO-NEXT: movl %ecx, 8(%rdi) +; CHECK-ZERO-NEXT: movl %edx, 4(%rdi) +; CHECK-ZERO-NEXT: movl %esi, (%rdi) +; CHECK-ZERO-NEXT: movq %rdi, %rax +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: in_vec_constmask_splat: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ONE-NEXT: xorl %r9d, %esi +; CHECK-ONE-NEXT: xorl %eax, %edx +; CHECK-ONE-NEXT: xorl %r11d, %ecx +; CHECK-ONE-NEXT: xorl %r10d, %r8d +; CHECK-ONE-NEXT: andl $16776960, %r8d # imm = 0xFFFF00 +; CHECK-ONE-NEXT: andl $16776960, %ecx # imm = 0xFFFF00 +; CHECK-ONE-NEXT: andl $16776960, %edx # imm = 0xFFFF00 +; CHECK-ONE-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-ONE-NEXT: xorl %r9d, %esi +; CHECK-ONE-NEXT: xorl %eax, %edx +; CHECK-ONE-NEXT: xorl %r11d, %ecx +; CHECK-ONE-NEXT: xorl %r10d, %r8d +; CHECK-ONE-NEXT: movl %r8d, 12(%rdi) +; CHECK-ONE-NEXT: movl %ecx, 8(%rdi) +; CHECK-ONE-NEXT: movl %edx, 4(%rdi) +; CHECK-ONE-NEXT: movl %esi, (%rdi) +; CHECK-ONE-NEXT: movq %rdi, %rax +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: in_vec_constmask_splat: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: xorps %xmm1, %xmm0 +; CHECK-TWO-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-TWO-NEXT: xorps %xmm1, %xmm0 +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: in_vec_constmask_splat: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: xorps %xmm1, %xmm0 +; CHECK-THREE-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-THREE-NEXT: xorps %xmm1, %xmm0 +; CHECK-THREE-NEXT: retq + %n0 = xor <4 x i32> %x, %y + %n1 = and <4 x i32> %n0, + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +} + +define <4 x i32> @in_vec_constmask_undef(<4 x i32> %x, <4 x i32> %y) { +; CHECK-ZERO-LABEL: in_vec_constmask_undef: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: xorl %r9d, %esi +; CHECK-ZERO-NEXT: xorl %eax, %edx +; CHECK-ZERO-NEXT: xorl %ecx, %r8d +; CHECK-ZERO-NEXT: andl $16776960, %r8d # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: andl $16776960, %edx # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: xorl %r9d, %esi +; CHECK-ZERO-NEXT: xorl %eax, %edx +; CHECK-ZERO-NEXT: xorl %ecx, %r8d +; CHECK-ZERO-NEXT: movl %r10d, 8(%rdi) +; CHECK-ZERO-NEXT: movl %r8d, 12(%rdi) +; CHECK-ZERO-NEXT: movl %edx, 4(%rdi) +; CHECK-ZERO-NEXT: movl %esi, (%rdi) +; CHECK-ZERO-NEXT: movq %rdi, %rax +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: in_vec_constmask_undef: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ONE-NEXT: xorl %r9d, %esi +; CHECK-ONE-NEXT: xorl %eax, %edx +; CHECK-ONE-NEXT: xorl %ecx, %r8d +; CHECK-ONE-NEXT: andl $16776960, %r8d # imm = 0xFFFF00 +; CHECK-ONE-NEXT: andl $16776960, %edx # imm = 0xFFFF00 +; CHECK-ONE-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-ONE-NEXT: xorl %r9d, %esi +; CHECK-ONE-NEXT: xorl %eax, %edx +; CHECK-ONE-NEXT: xorl %ecx, %r8d +; CHECK-ONE-NEXT: movl %r10d, 8(%rdi) +; CHECK-ONE-NEXT: movl %r8d, 12(%rdi) +; CHECK-ONE-NEXT: movl %edx, 4(%rdi) +; CHECK-ONE-NEXT: movl %esi, (%rdi) +; CHECK-ONE-NEXT: movq %rdi, %rax +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: in_vec_constmask_undef: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: xorps %xmm1, %xmm0 +; CHECK-TWO-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-TWO-NEXT: xorps %xmm1, %xmm0 +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: in_vec_constmask_undef: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: xorps %xmm1, %xmm0 +; CHECK-THREE-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-THREE-NEXT: xorps %xmm1, %xmm0 +; CHECK-THREE-NEXT: retq + %n0 = xor <4 x i32> %x, %y + %n1 = and <4 x i32> %n0, + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +} + +define <4 x i32> @in_vec_constmask_nonsplat(<4 x i32> %x, <4 x i32> %y) { +; CHECK-ZERO-LABEL: in_vec_constmask_nonsplat: +; CHECK-ZERO: # %bb.0: +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ZERO-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ZERO-NEXT: xorl %r9d, %esi +; CHECK-ZERO-NEXT: xorl %eax, %edx +; CHECK-ZERO-NEXT: xorl %r11d, %ecx +; CHECK-ZERO-NEXT: xorl %r10d, %r8d +; CHECK-ZERO-NEXT: andl $16711935, %r8d # imm = 0xFF00FF +; CHECK-ZERO-NEXT: andl $16776960, %ecx # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: andl $16711935, %edx # imm = 0xFF00FF +; CHECK-ZERO-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-ZERO-NEXT: xorl %r9d, %esi +; CHECK-ZERO-NEXT: xorl %eax, %edx +; CHECK-ZERO-NEXT: xorl %r11d, %ecx +; CHECK-ZERO-NEXT: xorl %r10d, %r8d +; CHECK-ZERO-NEXT: movl %r8d, 12(%rdi) +; CHECK-ZERO-NEXT: movl %ecx, 8(%rdi) +; CHECK-ZERO-NEXT: movl %edx, 4(%rdi) +; CHECK-ZERO-NEXT: movl %esi, (%rdi) +; CHECK-ZERO-NEXT: movq %rdi, %rax +; CHECK-ZERO-NEXT: retq +; +; CHECK-ONE-LABEL: in_vec_constmask_nonsplat: +; CHECK-ONE: # %bb.0: +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-ONE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-ONE-NEXT: xorl %r9d, %esi +; CHECK-ONE-NEXT: xorl %eax, %edx +; CHECK-ONE-NEXT: xorl %r11d, %ecx +; CHECK-ONE-NEXT: xorl %r10d, %r8d +; CHECK-ONE-NEXT: andl $16711935, %r8d # imm = 0xFF00FF +; CHECK-ONE-NEXT: andl $16776960, %ecx # imm = 0xFFFF00 +; CHECK-ONE-NEXT: andl $16711935, %edx # imm = 0xFF00FF +; CHECK-ONE-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-ONE-NEXT: xorl %r9d, %esi +; CHECK-ONE-NEXT: xorl %eax, %edx +; CHECK-ONE-NEXT: xorl %r11d, %ecx +; CHECK-ONE-NEXT: xorl %r10d, %r8d +; CHECK-ONE-NEXT: movl %r8d, 12(%rdi) +; CHECK-ONE-NEXT: movl %ecx, 8(%rdi) +; CHECK-ONE-NEXT: movl %edx, 4(%rdi) +; CHECK-ONE-NEXT: movl %esi, (%rdi) +; CHECK-ONE-NEXT: movq %rdi, %rax +; CHECK-ONE-NEXT: retq +; +; CHECK-TWO-LABEL: in_vec_constmask_nonsplat: +; CHECK-TWO: # %bb.0: +; CHECK-TWO-NEXT: xorps %xmm1, %xmm0 +; CHECK-TWO-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-TWO-NEXT: xorps %xmm1, %xmm0 +; CHECK-TWO-NEXT: retq +; +; CHECK-THREE-LABEL: in_vec_constmask_nonsplat: +; CHECK-THREE: # %bb.0: +; CHECK-THREE-NEXT: xorps %xmm1, %xmm0 +; CHECK-THREE-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-THREE-NEXT: xorps %xmm1, %xmm0 +; CHECK-THREE-NEXT: retq + %n0 = xor <4 x i32> %x, %y + %n1 = and <4 x i32> %n0, + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +} Index: test/CodeGen/X86/unfold-masked-merge-scalar.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/unfold-masked-merge-scalar.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi < %s | FileCheck %s --check-prefix=CHECK-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s --check-prefix=CHECK-BMI + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +define i32 @out(i32 %x, i32 %y, i32 %mask) { +; CHECK-NOBMI-LABEL: out: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: notl %edx +; CHECK-NOBMI-NEXT: andl %esi, %edx +; CHECK-NOBMI-NEXT: orl %edi, %edx +; CHECK-NOBMI-NEXT: movl %edx, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: andnl %esi, %edx, %eax +; CHECK-BMI-NEXT: orl %edi, %eax +; CHECK-BMI-NEXT: retq + %mx = and i32 %x, %mask + %notmask = xor i32 %mask, -1 + %my = and i32 %y, %notmask + %r = or i32 %mx, %my + ret i32 %r +} + +define i32 @out_constmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: out_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: andl $-16776961, %esi # imm = 0xFF0000FF +; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-BMI-NEXT: andl $-16776961, %esi # imm = 0xFF0000FF +; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-BMI-NEXT: retq + %mx = and i32 %x, 16776960 + %my = and i32 %y, -16776961 + %r = or i32 %mx, %my + ret i32 %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i32 @in(i32 %x, i32 %y, i32 %mask) { +; CHECK-NOBMI-LABEL: in: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_constmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %n1, %y + ret i32 %r +} Index: test/CodeGen/X86/unfold-masked-merge-vector.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/unfold-masked-merge-vector.ll @@ -0,0 +1,350 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-sse < %s | FileCheck %s --check-prefix=CHECK-NOSSE +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse < %s | FileCheck %s --check-prefix=CHECK-SSE + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +define <4 x i32> @out_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-NOSSE-LABEL: out_vec: +; CHECK-NOSSE: # %bb.0: +; CHECK-NOSSE-NEXT: pushq %rbx +; CHECK-NOSSE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NOSSE-NEXT: .cfi_offset %rbx, -16 +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %ebx +; CHECK-NOSSE-NEXT: andl %ebx, %r8d +; CHECK-NOSSE-NEXT: andl %eax, %ecx +; CHECK-NOSSE-NEXT: andl %r11d, %edx +; CHECK-NOSSE-NEXT: andl %r10d, %esi +; CHECK-NOSSE-NEXT: notl %r11d +; CHECK-NOSSE-NEXT: notl %eax +; CHECK-NOSSE-NEXT: notl %ebx +; CHECK-NOSSE-NEXT: notl %r10d +; CHECK-NOSSE-NEXT: andl %r9d, %r10d +; CHECK-NOSSE-NEXT: orl %esi, %r10d +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %ebx +; CHECK-NOSSE-NEXT: orl %r8d, %ebx +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-NOSSE-NEXT: orl %ecx, %eax +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %r11d +; CHECK-NOSSE-NEXT: orl %edx, %r11d +; CHECK-NOSSE-NEXT: movl %ebx, 12(%rdi) +; CHECK-NOSSE-NEXT: movl %eax, 8(%rdi) +; CHECK-NOSSE-NEXT: movl %r11d, 4(%rdi) +; CHECK-NOSSE-NEXT: movl %r10d, (%rdi) +; CHECK-NOSSE-NEXT: movq %rdi, %rax +; CHECK-NOSSE-NEXT: popq %rbx +; CHECK-NOSSE-NEXT: retq +; +; CHECK-SSE-LABEL: out_vec: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: andps %xmm2, %xmm0 +; CHECK-SSE-NEXT: andnps %xmm1, %xmm2 +; CHECK-SSE-NEXT: orps %xmm2, %xmm0 +; CHECK-SSE-NEXT: retq + %mx = and <4 x i32> %x, %mask + %notmask = xor <4 x i32> %mask, + %my = and <4 x i32> %y, %notmask + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @out_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-NOSSE-LABEL: out_vec_undef: +; CHECK-NOSSE: # %bb.0: +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NOSSE-NEXT: andl %eax, %r8d +; CHECK-NOSSE-NEXT: andl %r11d, %edx +; CHECK-NOSSE-NEXT: andl %r10d, %esi +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %ecx +; CHECK-NOSSE-NEXT: notl %r11d +; CHECK-NOSSE-NEXT: notl %eax +; CHECK-NOSSE-NEXT: notl %r10d +; CHECK-NOSSE-NEXT: andl %r9d, %r10d +; CHECK-NOSSE-NEXT: orl %esi, %r10d +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-NOSSE-NEXT: orl %r8d, %eax +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %r11d +; CHECK-NOSSE-NEXT: orl %edx, %r11d +; CHECK-NOSSE-NEXT: movl %ecx, 8(%rdi) +; CHECK-NOSSE-NEXT: movl %eax, 12(%rdi) +; CHECK-NOSSE-NEXT: movl %r11d, 4(%rdi) +; CHECK-NOSSE-NEXT: movl %r10d, (%rdi) +; CHECK-NOSSE-NEXT: movq %rdi, %rax +; CHECK-NOSSE-NEXT: retq +; +; CHECK-SSE-LABEL: out_vec_undef: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: andps %xmm2, %xmm0 +; CHECK-SSE-NEXT: andnps %xmm1, %xmm2 +; CHECK-SSE-NEXT: orps %xmm2, %xmm0 +; CHECK-SSE-NEXT: retq + %mx = and <4 x i32> %x, %mask + %notmask = xor <4 x i32> %mask, + %my = and <4 x i32> %y, %notmask + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @out_vec_constmask_splat(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-NOSSE-LABEL: out_vec_constmask_splat: +; CHECK-NOSSE: # %bb.0: +; CHECK-NOSSE-NEXT: andl $16776960, %r8d # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: andl $16776960, %ecx # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: andl $16776960, %edx # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: movl $-16776961, %eax # imm = 0xFF0000FF +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-NOSSE-NEXT: andl %eax, %r10d +; CHECK-NOSSE-NEXT: orl %r8d, %r10d +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r8d +; CHECK-NOSSE-NEXT: andl %eax, %r8d +; CHECK-NOSSE-NEXT: orl %ecx, %r8d +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-NOSSE-NEXT: orl %edx, %eax +; CHECK-NOSSE-NEXT: andl $-16776961, %r9d # imm = 0xFF0000FF +; CHECK-NOSSE-NEXT: orl %esi, %r9d +; CHECK-NOSSE-NEXT: movl %r10d, 12(%rdi) +; CHECK-NOSSE-NEXT: movl %r8d, 8(%rdi) +; CHECK-NOSSE-NEXT: movl %eax, 4(%rdi) +; CHECK-NOSSE-NEXT: movl %r9d, (%rdi) +; CHECK-NOSSE-NEXT: movq %rdi, %rax +; CHECK-NOSSE-NEXT: retq +; +; CHECK-SSE-LABEL: out_vec_constmask_splat: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0] +; CHECK-SSE-NEXT: andps %xmm2, %xmm0 +; CHECK-SSE-NEXT: andnps %xmm1, %xmm2 +; CHECK-SSE-NEXT: orps %xmm2, %xmm0 +; CHECK-SSE-NEXT: retq + %mx = and <4 x i32> %x, + %my = and <4 x i32> %y, + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @out_vec_constmask_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-NOSSE-LABEL: out_vec_constmask_undef: +; CHECK-NOSSE: # %bb.0: +; CHECK-NOSSE-NEXT: andl $16776960, %r8d # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: andl $16776960, %edx # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: movl $-16776961, %eax # imm = 0xFF0000FF +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-NOSSE-NEXT: andl %eax, %ecx +; CHECK-NOSSE-NEXT: orl %r8d, %ecx +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-NOSSE-NEXT: orl %edx, %eax +; CHECK-NOSSE-NEXT: andl $-16776961, %r9d # imm = 0xFF0000FF +; CHECK-NOSSE-NEXT: orl %esi, %r9d +; CHECK-NOSSE-NEXT: movl %ecx, 12(%rdi) +; CHECK-NOSSE-NEXT: movl %eax, 4(%rdi) +; CHECK-NOSSE-NEXT: movl %r9d, (%rdi) +; CHECK-NOSSE-NEXT: movl $0, 8(%rdi) +; CHECK-NOSSE-NEXT: movq %rdi, %rax +; CHECK-NOSSE-NEXT: retq +; +; CHECK-SSE-LABEL: out_vec_constmask_undef: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,0,0,255,255,0,255,255,255,255,0,255,255,0] +; CHECK-SSE-NEXT: andps %xmm2, %xmm0 +; CHECK-SSE-NEXT: andnps %xmm1, %xmm2 +; CHECK-SSE-NEXT: orps %xmm2, %xmm0 +; CHECK-SSE-NEXT: retq + %mx = and <4 x i32> %x, + %my = and <4 x i32> %y, + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +define <4 x i32> @out_vec_constmask_nonsplat(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-NOSSE-LABEL: out_vec_constmask_nonsplat: +; CHECK-NOSSE: # %bb.0: +; CHECK-NOSSE-NEXT: andl $16711935, %r8d # imm = 0xFF00FF +; CHECK-NOSSE-NEXT: andl $16776960, %ecx # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: andl $16711935, %edx # imm = 0xFF00FF +; CHECK-NOSSE-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: movl $-16711936, %r11d # imm = 0xFF00FF00 +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-NOSSE-NEXT: andl %r11d, %r10d +; CHECK-NOSSE-NEXT: orl %r8d, %r10d +; CHECK-NOSSE-NEXT: movl $-16776961, %eax # imm = 0xFF0000FF +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %eax +; CHECK-NOSSE-NEXT: orl %ecx, %eax +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %r11d +; CHECK-NOSSE-NEXT: orl %edx, %r11d +; CHECK-NOSSE-NEXT: andl $-16776961, %r9d # imm = 0xFF0000FF +; CHECK-NOSSE-NEXT: orl %esi, %r9d +; CHECK-NOSSE-NEXT: movl %r10d, 12(%rdi) +; CHECK-NOSSE-NEXT: movl %eax, 8(%rdi) +; CHECK-NOSSE-NEXT: movl %r11d, 4(%rdi) +; CHECK-NOSSE-NEXT: movl %r9d, (%rdi) +; CHECK-NOSSE-NEXT: movq %rdi, %rax +; CHECK-NOSSE-NEXT: retq +; +; CHECK-SSE-LABEL: out_vec_constmask_nonsplat: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,0,255,0,255,0,0,255,255,0,255,0,255,0] +; CHECK-SSE-NEXT: andps %xmm2, %xmm0 +; CHECK-SSE-NEXT: andnps %xmm1, %xmm2 +; CHECK-SSE-NEXT: orps %xmm2, %xmm0 +; CHECK-SSE-NEXT: retq + %mx = and <4 x i32> %x, + %my = and <4 x i32> %y, + %r = or <4 x i32> %mx, %my + ret <4 x i32> %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define <4 x i32> @in_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { +; CHECK-NOSSE-LABEL: in_vec: +; CHECK-NOSSE: # %bb.0: +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NOSSE-NEXT: xorl %r9d, %esi +; CHECK-NOSSE-NEXT: xorl %eax, %edx +; CHECK-NOSSE-NEXT: xorl %r11d, %ecx +; CHECK-NOSSE-NEXT: xorl %r10d, %r8d +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %r8d +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %ecx +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %edx +; CHECK-NOSSE-NEXT: andl {{[0-9]+}}(%rsp), %esi +; CHECK-NOSSE-NEXT: xorl %r9d, %esi +; CHECK-NOSSE-NEXT: xorl %eax, %edx +; CHECK-NOSSE-NEXT: xorl %r11d, %ecx +; CHECK-NOSSE-NEXT: xorl %r10d, %r8d +; CHECK-NOSSE-NEXT: movl %r8d, 12(%rdi) +; CHECK-NOSSE-NEXT: movl %ecx, 8(%rdi) +; CHECK-NOSSE-NEXT: movl %edx, 4(%rdi) +; CHECK-NOSSE-NEXT: movl %esi, (%rdi) +; CHECK-NOSSE-NEXT: movq %rdi, %rax +; CHECK-NOSSE-NEXT: retq +; +; CHECK-SSE-LABEL: in_vec: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: xorps %xmm1, %xmm0 +; CHECK-SSE-NEXT: andps %xmm2, %xmm0 +; CHECK-SSE-NEXT: xorps %xmm1, %xmm0 +; CHECK-SSE-NEXT: retq + %n0 = xor <4 x i32> %x, %y + %n1 = and <4 x i32> %n0, %mask + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +} + +define <4 x i32> @in_vec_constmask_splat(<4 x i32> %x, <4 x i32> %y) { +; CHECK-NOSSE-LABEL: in_vec_constmask_splat: +; CHECK-NOSSE: # %bb.0: +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NOSSE-NEXT: xorl %r9d, %esi +; CHECK-NOSSE-NEXT: xorl %eax, %edx +; CHECK-NOSSE-NEXT: xorl %r11d, %ecx +; CHECK-NOSSE-NEXT: xorl %r10d, %r8d +; CHECK-NOSSE-NEXT: andl $16776960, %r8d # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: andl $16776960, %ecx # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: andl $16776960, %edx # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: xorl %r9d, %esi +; CHECK-NOSSE-NEXT: xorl %eax, %edx +; CHECK-NOSSE-NEXT: xorl %r11d, %ecx +; CHECK-NOSSE-NEXT: xorl %r10d, %r8d +; CHECK-NOSSE-NEXT: movl %r8d, 12(%rdi) +; CHECK-NOSSE-NEXT: movl %ecx, 8(%rdi) +; CHECK-NOSSE-NEXT: movl %edx, 4(%rdi) +; CHECK-NOSSE-NEXT: movl %esi, (%rdi) +; CHECK-NOSSE-NEXT: movq %rdi, %rax +; CHECK-NOSSE-NEXT: retq +; +; CHECK-SSE-LABEL: in_vec_constmask_splat: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: xorps %xmm1, %xmm0 +; CHECK-SSE-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-SSE-NEXT: xorps %xmm1, %xmm0 +; CHECK-SSE-NEXT: retq + %n0 = xor <4 x i32> %x, %y + %n1 = and <4 x i32> %n0, + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +} + +define <4 x i32> @in_vec_constmask_undef(<4 x i32> %x, <4 x i32> %y) { +; CHECK-NOSSE-LABEL: in_vec_constmask_undef: +; CHECK-NOSSE: # %bb.0: +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %ecx +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NOSSE-NEXT: xorl %r9d, %esi +; CHECK-NOSSE-NEXT: xorl %eax, %edx +; CHECK-NOSSE-NEXT: xorl %ecx, %r8d +; CHECK-NOSSE-NEXT: andl $16776960, %r8d # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: andl $16776960, %edx # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: xorl %r9d, %esi +; CHECK-NOSSE-NEXT: xorl %eax, %edx +; CHECK-NOSSE-NEXT: xorl %ecx, %r8d +; CHECK-NOSSE-NEXT: movl %r10d, 8(%rdi) +; CHECK-NOSSE-NEXT: movl %r8d, 12(%rdi) +; CHECK-NOSSE-NEXT: movl %edx, 4(%rdi) +; CHECK-NOSSE-NEXT: movl %esi, (%rdi) +; CHECK-NOSSE-NEXT: movq %rdi, %rax +; CHECK-NOSSE-NEXT: retq +; +; CHECK-SSE-LABEL: in_vec_constmask_undef: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: xorps %xmm1, %xmm0 +; CHECK-SSE-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-SSE-NEXT: xorps %xmm1, %xmm0 +; CHECK-SSE-NEXT: retq + %n0 = xor <4 x i32> %x, %y + %n1 = and <4 x i32> %n0, + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +} + +define <4 x i32> @in_vec_constmask_nonsplat(<4 x i32> %x, <4 x i32> %y) { +; CHECK-NOSSE-LABEL: in_vec_constmask_nonsplat: +; CHECK-NOSSE: # %bb.0: +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-NOSSE-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NOSSE-NEXT: xorl %r9d, %esi +; CHECK-NOSSE-NEXT: xorl %eax, %edx +; CHECK-NOSSE-NEXT: xorl %r11d, %ecx +; CHECK-NOSSE-NEXT: xorl %r10d, %r8d +; CHECK-NOSSE-NEXT: andl $16711935, %r8d # imm = 0xFF00FF +; CHECK-NOSSE-NEXT: andl $16776960, %ecx # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: andl $16711935, %edx # imm = 0xFF00FF +; CHECK-NOSSE-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-NOSSE-NEXT: xorl %r9d, %esi +; CHECK-NOSSE-NEXT: xorl %eax, %edx +; CHECK-NOSSE-NEXT: xorl %r11d, %ecx +; CHECK-NOSSE-NEXT: xorl %r10d, %r8d +; CHECK-NOSSE-NEXT: movl %r8d, 12(%rdi) +; CHECK-NOSSE-NEXT: movl %ecx, 8(%rdi) +; CHECK-NOSSE-NEXT: movl %edx, 4(%rdi) +; CHECK-NOSSE-NEXT: movl %esi, (%rdi) +; CHECK-NOSSE-NEXT: movq %rdi, %rax +; CHECK-NOSSE-NEXT: retq +; +; CHECK-SSE-LABEL: in_vec_constmask_nonsplat: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: xorps %xmm1, %xmm0 +; CHECK-SSE-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-SSE-NEXT: xorps %xmm1, %xmm0 +; CHECK-SSE-NEXT: retq + %n0 = xor <4 x i32> %x, %y + %n1 = and <4 x i32> %n0, + %r = xor <4 x i32> %n1, %y + ret <4 x i32> %r +}