Index: llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll +++ llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -0,0 +1,271 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +; X: [byte3] [byte0] +; Y: [byte2][byte1] + +define i8 @out8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: out8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #2 +; CHECK-NEXT: bfi w1, w8, #2, #4 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i8 %x, 60 + %my = and i8 %y, -61 + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: out16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #4 +; CHECK-NEXT: bfi w1, w8, #4, #8 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i16 %x, 4080 + %my = and i16 %y, -4081 + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: out32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #8 +; CHECK-NEXT: bfi w1, w8, #8, #16 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i32 %x, 16776960 + %my = and i32 %y, -16776961 + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: out64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr x8, x0, #16 +; CHECK-NEXT: bfi x1, x8, #16, #32 +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret + %mx = and i64 %x, 281474976645120 + %my = and i64 %y, -281474976645121 + %r = or i64 %mx, %my + ret i64 %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: in8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0x3c +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, 60 + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: in16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xff0 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, 4080 + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: in32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff00 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: in64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, x1 +; CHECK-NEXT: and x8, x8, #0xffffffff0000 +; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: ret + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, 281474976645120 + %r = xor i64 %n1, %y + ret i64 %r +} + +; ============================================================================ ; +; Constant Commutativity tests. +; ============================================================================ ; + +define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_0_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff00 +; CHECK-NEXT: eor w0, w1, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff00 +; CHECK-NEXT: eor w0, w8, w0 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff00 +; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} + +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; + +define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y0_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0xffff00 +; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y1_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0xffff00 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %y, %n1 + ret i32 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; Multi-use tests. + +declare void @use32(i32) nounwind + +define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_A_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w20, w8, #0xffff00 +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_B_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w0, w0, w1 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w0, #0xffff00 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} + +; Various bad variants + +define i32 @n0_badconstmask(i32 %x, i32 %y) { +; CHECK-LABEL: n0_badconstmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #256 +; CHECK-NEXT: movk w9, #65280, lsl #16 +; CHECK-NEXT: and w8, w0, #0xffff00 +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, 16776960 + %my = and i32 %y, -16776960 ; instead of -16776961 + %r = or i32 %mx, %my + ret i32 %r +} + +define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: n1_thirdvar_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff00 +; CHECK-NEXT: eor w0, w8, w2 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} Index: llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll +++ llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll @@ -0,0 +1,277 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +; X: [bit2] [bit0] +; Y: [bit3] [bit1] + +define i8 @out8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: out8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #85 +; CHECK-NEXT: mov w9, #-86 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i8 %x, 85 + %my = and i8 %y, -86 + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: out16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #21845 +; CHECK-NEXT: mov w9, #-21846 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i16 %x, 21845 + %my = and i16 %y, -21846 + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: out32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0x55555555 +; CHECK-NEXT: and w9, w1, #0xaaaaaaaa +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, 1431655765 + %my = and i32 %y, -1431655766 + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: out64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and x8, x0, #0x5555555555555555 +; CHECK-NEXT: and x9, x1, #0xaaaaaaaaaaaaaaaa +; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: ret + %mx = and i64 %x, 6148914691236517205 + %my = and i64 %y, -6148914691236517206 + %r = or i64 %mx, %my + ret i64 %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: in8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: mov w9, #85 +; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, 85 + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: in16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: mov w9, #21845 +; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, 21845 + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: in32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0x55555555 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: in64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, x1 +; CHECK-NEXT: and x8, x8, #0x5555555555555555 +; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: ret + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, 6148914691236517205 + %r = xor i64 %n1, %y + ret i64 %r +} + +; ============================================================================ ; +; Constant Commutativity tests. +; ============================================================================ ; + +define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_0_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0x55555555 +; CHECK-NEXT: eor w0, w1, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0x55555555 +; CHECK-NEXT: eor w0, w8, w0 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0x55555555 +; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} + +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; + +define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y0_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0x55555555 +; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y1_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0x55555555 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %y, %n1 + ret i32 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; Multi-use tests. + +declare void @use32(i32) nounwind + +define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_A_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w20, w8, #0x55555555 +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_B_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w0, w0, w1 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w0, #0x55555555 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} + +; Various bad variants + +define i32 @n0_badconstmask(i32 %x, i32 %y) { +; CHECK-LABEL: n0_badconstmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: movk w9, #43690, lsl #16 +; CHECK-NEXT: and w8, w0, #0x55555555 +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, 1431655765 + %my = and i32 %y, -1431655765 ; instead of -1431655766 + %r = or i32 %mx, %my + ret i32 %r +} + +define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: n1_thirdvar_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0x55555555 +; CHECK-NEXT: eor w0, w8, w2 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} Index: llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll +++ llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll @@ -0,0 +1,273 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +; X: [bit 3210] +; Y: [bit 7654] + +define i8 @out8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: out8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: bfxil w1, w0, #0, #4 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i8 %x, 15 + %my = and i8 %y, -16 + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: out16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3855 +; CHECK-NEXT: mov w9, #-3856 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i16 %x, 3855 + %my = and i16 %y, -3856 + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: out32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xf0f0f0f +; CHECK-NEXT: and w9, w1, #0xf0f0f0f0 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, 252645135 + %my = and i32 %y, -252645136 + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: out64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and x8, x0, #0xf0f0f0f0f0f0f0f +; CHECK-NEXT: and x9, x1, #0xf0f0f0f0f0f0f0f0 +; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: ret + %mx = and i64 %x, 1085102592571150095 + %my = and i64 %y, -1085102592571150096 + %r = or i64 %mx, %my + ret i64 %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: in8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, 15 + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: in16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: mov w9, #3855 +; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, 3855 + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: in32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf0f0f0f +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: in64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, x1 +; CHECK-NEXT: and x8, x8, #0xf0f0f0f0f0f0f0f +; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: ret + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, 1085102592571150095 + %r = xor i64 %n1, %y + ret i64 %r +} + +; ============================================================================ ; +; Constant Commutativity tests. +; ============================================================================ ; + +define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_0_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf0f0f0f +; CHECK-NEXT: eor w0, w1, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf0f0f0f +; CHECK-NEXT: eor w0, w8, w0 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf0f0f0f +; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} + +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; + +define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y0_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0xf0f0f0f +; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y1_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0xf0f0f0f +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %y, %n1 + ret i32 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; Multi-use tests. + +declare void @use32(i32) nounwind + +define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_A_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w20, w8, #0xf0f0f0f +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_B_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w0, w0, w1 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w0, #0xf0f0f0f +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} + +; Various bad variants + +define i32 @n0_badconstmask(i32 %x, i32 %y) { +; CHECK-LABEL: n0_badconstmask: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #61681 +; CHECK-NEXT: movk w9, #61680, lsl #16 +; CHECK-NEXT: and w8, w0, #0xf0f0f0f +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, 252645135 + %my = and i32 %y, -252645135 ; instead of -252645136 + %r = or i32 %mx, %my + ret i32 %r +} + +define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: n1_thirdvar_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf0f0f0f +; CHECK-NEXT: eor w0, w8, w2 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} Index: llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll +++ llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +; X: [byte1][byte0] +; Y: [byte3][byte2] + +define i8 @out8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: out8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: bfxil w1, w0, #0, #4 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i8 %x, 15 + %my = and i8 %y, -16 + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: out16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: bfxil w1, w0, #0, #8 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i16 %x, 255 + %my = and i16 %y, -256 + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: out32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: bfxil w1, w0, #0, #16 +; CHECK-NEXT: mov w0, w1 +; CHECK-NEXT: ret + %mx = and i32 %x, 65535 + %my = and i32 %y, -65536 + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: out64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: bfxil x1, x0, #0, #32 +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret + %mx = and i64 %x, 4294967295 + %my = and i64 %y, -4294967296 + %r = or i64 %mx, %my + ret i64 %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8_constmask(i8 %x, i8 %y) { +; CHECK-LABEL: in8_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xf +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, 15 + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16_constmask(i16 %x, i16 %y) { +; CHECK-LABEL: in16_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xff +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, 255 + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32_constmask(i32 %x, i32 %y) { +; CHECK-LABEL: in32_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64_constmask(i64 %x, i64 %y) { +; CHECK-LABEL: in64_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: ret + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, 4294967295 + %r = xor i64 %n1, %y + ret i64 %r +} + +; ============================================================================ ; +; Constant Commutativity tests. +; ============================================================================ ; + +define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_0_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: eor w0, w1, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: eor w0, w8, w0 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { +; CHECK-LABEL: in_constmask_commutativity_1_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} + +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; + +define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y0_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0xffff +; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-LABEL: in_complex_y1_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, #0xffff +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %y, %n1 + ret i32 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; Multi-use tests. + +declare void @use32(i32) nounwind + +define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_A_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w20, w8, #0xffff +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-LABEL: in_multiuse_B_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w0, w0, w1 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w0, #0xffff +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} + +; Various bad variants + +define i32 @n0_badconstmask(i32 %x, i32 %y) { +; CHECK-LABEL: n0_badconstmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: and w9, w1, #0xffff0001 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, 65535 + %my = and i32 %y, -65535 ; instead of -65536 + %r = or i32 %mx, %my + ret i32 %r +} + +define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: n1_thirdvar_constmask: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, #0xffff +; CHECK-NEXT: eor w0, w8, w2 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} Index: llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll +++ llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll @@ -0,0 +1,415 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +define i8 @out8(i8 %x, i8 %y, i8 %mask) { +; CHECK-LABEL: out8: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i8 %x, %mask + %notmask = xor i8 %mask, -1 + %my = and i8 %y, %notmask + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16(i16 %x, i16 %y, i16 %mask) { +; CHECK-LABEL: out16: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i16 %x, %mask + %notmask = xor i16 %mask, -1 + %my = and i16 %y, %notmask + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: out32: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, %mask + %notmask = xor i32 %mask, -1 + %my = and i32 %y, %notmask + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64(i64 %x, i64 %y, i64 %mask) { +; CHECK-LABEL: out64: +; CHECK: // %bb.0: +; CHECK-NEXT: and x8, x0, x2 +; CHECK-NEXT: bic x9, x1, x2 +; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: ret + %mx = and i64 %x, %mask + %notmask = xor i64 %mask, -1 + %my = and i64 %y, %notmask + %r = or i64 %mx, %my + ret i64 %r +} +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8(i8 %x, i8 %y, i8 %mask) { +; CHECK-LABEL: in8: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, w2 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, %mask + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16(i16 %x, i16 %y, i16 %mask) { +; CHECK-LABEL: in16: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, w2 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, %mask + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in32: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, w2 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64(i64 %x, i64 %y, i64 %mask) { +; CHECK-LABEL: in64: +; CHECK: // %bb.0: +; CHECK-NEXT: eor x8, x0, x1 +; CHECK-NEXT: and x8, x8, x2 +; CHECK-NEXT: eor x0, x8, x1 +; CHECK-NEXT: ret + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, %mask + %r = xor i64 %n1, %y + ret i64 %r +} +; ============================================================================ ; +; Commutativity tests. +; ============================================================================ ; +define i32 @in_commutativity_0_0_1(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_0_0_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w2, w8 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 ; swapped + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_commutativity_0_1_0(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_0_1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, w2 +; CHECK-NEXT: eor w0, w1, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} +define i32 @in_commutativity_0_1_1(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_0_1_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w2, w8 +; CHECK-NEXT: eor w0, w1, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 ; swapped + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} +define i32 @in_commutativity_1_0_0(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_1_0_0: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, w2 +; CHECK-NEXT: eor w0, w8, w0 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} +define i32 @in_commutativity_1_0_1(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_1_0_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w2, w8 +; CHECK-NEXT: eor w0, w8, w0 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 ; swapped + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} +define i32 @in_commutativity_1_1_0(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_1_1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, w2 +; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} +define i32 @in_commutativity_1_1_1(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: in_commutativity_1_1_1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w2, w8 +; CHECK-NEXT: eor w0, w0, w8 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 ; swapped + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; +define i32 @in_complex_y0(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) { +; CHECK-LABEL: in_complex_y0: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, w3 +; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_complex_y1(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) { +; CHECK-LABEL: in_complex_y1: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: and w9, w9, w3 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %y, %n1 + ret i32 %r +} +; ============================================================================ ; +; M is an 'xor' too. +; ============================================================================ ; +define i32 @in_complex_m0(i32 %x, i32 %y, i32 %m_a, i32 %m_b) { +; CHECK-LABEL: in_complex_m0: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w2, w3 +; CHECK-NEXT: eor w9, w0, w1 +; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_complex_m1(i32 %x, i32 %y, i32 %m_a, i32 %m_b) { +; CHECK-LABEL: in_complex_m1: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w2, w3 +; CHECK-NEXT: eor w9, w0, w1 +; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: ret + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 + %r = xor i32 %n1, %y + ret i32 %r +} +; ============================================================================ ; +; Both Y and M are complex. +; ============================================================================ ; +define i32 @in_complex_y0_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { +; CHECK-LABEL: in_complex_y0_m0: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w3, w4 +; CHECK-NEXT: eor w10, w0, w8 +; CHECK-NEXT: and w9, w10, w9 +; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_complex_y1_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { +; CHECK-LABEL: in_complex_y1_m0: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w3, w4 +; CHECK-NEXT: eor w10, w0, w8 +; CHECK-NEXT: and w9, w10, w9 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %y, %n1 + ret i32 %r +} +define i32 @in_complex_y0_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { +; CHECK-LABEL: in_complex_y0_m1: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w3, w4 +; CHECK-NEXT: eor w10, w0, w8 +; CHECK-NEXT: and w9, w9, w10 +; CHECK-NEXT: eor w0, w9, w8 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_complex_y1_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { +; CHECK-LABEL: in_complex_y1_m1: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, w2 +; CHECK-NEXT: eor w9, w3, w4 +; CHECK-NEXT: eor w10, w0, w8 +; CHECK-NEXT: and w9, w9, w10 +; CHECK-NEXT: eor w0, w8, w9 +; CHECK-NEXT: ret + %y = and i32 %y_hi, %y_low + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 + %r = xor i32 %y, %n1 + ret i32 %r +} +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; +; Multi-use tests. +declare void @use32(i32) nounwind +define i32 @in_multiuse_A(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind { +; CHECK-LABEL: in_multiuse_A: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w20, w8, w3 +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_multiuse_B(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind { +; CHECK-LABEL: in_multiuse_B: +; CHECK: // %bb.0: +; CHECK-NEXT: str x20, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: eor w0, w0, w1 +; CHECK-NEXT: stp x19, x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w0, w3 +; CHECK-NEXT: bl use32 +; CHECK-NEXT: eor w0, w20, w19 +; CHECK-NEXT: ldp x19, x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldr x20, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} +; Various bad variants +define i32 @n0_badmask(i32 %x, i32 %y, i32 %mask, i32 %mask2) { +; CHECK-LABEL: n0_badmask: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w3 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, %mask + %notmask = xor i32 %mask2, -1 ; %mask2 instead of %mask + %my = and i32 %y, %notmask + %r = or i32 %mx, %my + ret i32 %r +} +define i32 @n0_badxor(i32 %x, i32 %y, i32 %mask) { +; CHECK-LABEL: n0_badxor: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w9, w2, #0x1 +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: and w9, w1, w9 +; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: ret + %mx = and i32 %x, %mask + %notmask = xor i32 %mask, 1 ; instead of -1 + %my = and i32 %y, %notmask + %r = or i32 %mx, %my + ret i32 %r +} +define i32 @n1_thirdvar(i32 %x, i32 %y, i32 %z, i32 %mask) { +; CHECK-LABEL: n1_thirdvar: +; CHECK: // %bb.0: +; CHECK-NEXT: eor w8, w0, w1 +; CHECK-NEXT: and w8, w8, w3 +; CHECK-NEXT: eor w0, w8, w2 +; CHECK-NEXT: ret + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} Index: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll +++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -0,0 +1,458 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi < %s | FileCheck %s --check-prefix=CHECK-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s --check-prefix=CHECK-BMI + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +; X: [byte3] [byte0] +; Y: [byte2][byte1] + +define i8 @out8_constmask(i8 %x, i8 %y) { +; CHECK-NOBMI-LABEL: out8_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andb $60, %dil +; CHECK-NOBMI-NEXT: andb $-61, %sil +; CHECK-NOBMI-NEXT: orb %dil, %sil +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out8_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andb $60, %dil +; CHECK-BMI-NEXT: andb $-61, %sil +; CHECK-BMI-NEXT: orb %dil, %sil +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %mx = and i8 %x, 60 + %my = and i8 %y, -61 + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16_constmask(i16 %x, i16 %y) { +; CHECK-NOBMI-LABEL: out16_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: andl $4080, %edi # imm = 0xFF0 +; CHECK-NOBMI-NEXT: andl $-4081, %esi # imm = 0xF00F +; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out16_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-BMI-NEXT: andl $4080, %edi # imm = 0xFF0 +; CHECK-BMI-NEXT: andl $-4081, %esi # imm = 0xF00F +; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI-NEXT: retq + %mx = and i16 %x, 4080 + %my = and i16 %y, -4081 + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32_constmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: out32_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: andl $-16776961, %esi # imm = 0xFF0000FF +; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out32_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-BMI-NEXT: andl $-16776961, %esi # imm = 0xFF0000FF +; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-BMI-NEXT: retq + %mx = and i32 %x, 16776960 + %my = and i32 %y, -16776961 + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64_constmask(i64 %x, i64 %y) { +; CHECK-NOBMI-LABEL: out64_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000 +; CHECK-NOBMI-NEXT: andq %rdi, %rcx +; CHECK-NOBMI-NEXT: movabsq $-281474976645121, %rax # imm = 0xFFFF00000000FFFF +; CHECK-NOBMI-NEXT: andq %rsi, %rax +; CHECK-NOBMI-NEXT: orq %rcx, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out64_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000 +; CHECK-BMI-NEXT: andq %rdi, %rcx +; CHECK-BMI-NEXT: movabsq $-281474976645121, %rax # imm = 0xFFFF00000000FFFF +; CHECK-BMI-NEXT: andq %rsi, %rax +; CHECK-BMI-NEXT: orq %rcx, %rax +; CHECK-BMI-NEXT: retq + %mx = and i64 %x, 281474976645120 + %my = and i64 %y, -281474976645121 + %r = or i64 %mx, %my + ret i64 %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8_constmask(i8 %x, i8 %y) { +; CHECK-NOBMI-LABEL: in8_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andb $60, %dil +; CHECK-NOBMI-NEXT: xorb %dil, %sil +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in8_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andb $60, %dil +; CHECK-BMI-NEXT: xorb %dil, %sil +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, 60 + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16_constmask(i16 %x, i16 %y) { +; CHECK-NOBMI-LABEL: in16_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $4080, %edi # imm = 0xFF0 +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in16_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $4080, %edi # imm = 0xFF0 +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, 4080 + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32_constmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in32_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in32_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64_constmask(i64 %x, i64 %y) { +; CHECK-NOBMI-LABEL: in64_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorq %rsi, %rdi +; CHECK-NOBMI-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: xorq %rsi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in64_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorq %rsi, %rdi +; CHECK-BMI-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 +; CHECK-BMI-NEXT: andq %rdi, %rax +; CHECK-BMI-NEXT: xorq %rsi, %rax +; CHECK-BMI-NEXT: retq + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, 281474976645120 + %r = xor i64 %n1, %y + ret i64 %r +} + +; ============================================================================ ; +; Constant Commutativity tests. +; ============================================================================ ; + +define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in_constmask_commutativity_0_1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_constmask_commutativity_0_1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_constmask_commutativity_1_0: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_constmask_commutativity_1_1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andl $16776960, %esi # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} + +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; + +define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-NOBMI-LABEL: in_complex_y0_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y0_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-NOBMI-LABEL: in_complex_y1_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y1_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %y, %n1 + ret i32 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; Multi-use tests. + +declare void @use32(i32) nounwind + +define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-NOBMI-LABEL: in_multiuse_A_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: pushq %rbp +; CHECK-NOBMI-NEXT: pushq %rbx +; CHECK-NOBMI-NEXT: pushq %rax +; CHECK-NOBMI-NEXT: movl %esi, %ebx +; CHECK-NOBMI-NEXT: movl %edi, %ebp +; CHECK-NOBMI-NEXT: xorl %esi, %ebp +; CHECK-NOBMI-NEXT: andl $16776960, %ebp # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: movl %ebp, %edi +; CHECK-NOBMI-NEXT: callq use32 +; CHECK-NOBMI-NEXT: xorl %ebx, %ebp +; CHECK-NOBMI-NEXT: movl %ebp, %eax +; CHECK-NOBMI-NEXT: addq $8, %rsp +; CHECK-NOBMI-NEXT: popq %rbx +; CHECK-NOBMI-NEXT: popq %rbp +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_multiuse_A_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: pushq %rbp +; CHECK-BMI-NEXT: pushq %rbx +; CHECK-BMI-NEXT: pushq %rax +; CHECK-BMI-NEXT: movl %esi, %ebx +; CHECK-BMI-NEXT: movl %edi, %ebp +; CHECK-BMI-NEXT: xorl %esi, %ebp +; CHECK-BMI-NEXT: andl $16776960, %ebp # imm = 0xFFFF00 +; CHECK-BMI-NEXT: movl %ebp, %edi +; CHECK-BMI-NEXT: callq use32 +; CHECK-BMI-NEXT: xorl %ebx, %ebp +; CHECK-BMI-NEXT: movl %ebp, %eax +; CHECK-BMI-NEXT: addq $8, %rsp +; CHECK-BMI-NEXT: popq %rbx +; CHECK-BMI-NEXT: popq %rbp +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-NOBMI-LABEL: in_multiuse_B_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: pushq %rbp +; CHECK-NOBMI-NEXT: pushq %rbx +; CHECK-NOBMI-NEXT: pushq %rax +; CHECK-NOBMI-NEXT: movl %esi, %ebx +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %ebp +; CHECK-NOBMI-NEXT: andl $16776960, %ebp # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: callq use32 +; CHECK-NOBMI-NEXT: xorl %ebx, %ebp +; CHECK-NOBMI-NEXT: movl %ebp, %eax +; CHECK-NOBMI-NEXT: addq $8, %rsp +; CHECK-NOBMI-NEXT: popq %rbx +; CHECK-NOBMI-NEXT: popq %rbp +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_multiuse_B_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: pushq %rbp +; CHECK-BMI-NEXT: pushq %rbx +; CHECK-BMI-NEXT: pushq %rax +; CHECK-BMI-NEXT: movl %esi, %ebx +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %ebp +; CHECK-BMI-NEXT: andl $16776960, %ebp # imm = 0xFFFF00 +; CHECK-BMI-NEXT: callq use32 +; CHECK-BMI-NEXT: xorl %ebx, %ebp +; CHECK-BMI-NEXT: movl %ebp, %eax +; CHECK-BMI-NEXT: addq $8, %rsp +; CHECK-BMI-NEXT: popq %rbx +; CHECK-BMI-NEXT: popq %rbp +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} + +; Various bad variants + +define i32 @n0_badconstmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: n0_badconstmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: andl $-16776960, %esi # imm = 0xFF000100 +; CHECK-NOBMI-NEXT: orl %edi, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: n0_badconstmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-BMI-NEXT: andl $-16776960, %esi # imm = 0xFF000100 +; CHECK-BMI-NEXT: orl %edi, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %mx = and i32 %x, 16776960 + %my = and i32 %y, -16776960 ; instead of -16776961 + %r = or i32 %mx, %my + ret i32 %r +} + +define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { +; CHECK-NOBMI-LABEL: n1_thirdvar_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-NOBMI-NEXT: xorl %edx, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: n1_thirdvar_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $16776960, %edi # imm = 0xFFFF00 +; CHECK-BMI-NEXT: xorl %edx, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 16776960 + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} Index: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll +++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll @@ -0,0 +1,458 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi < %s | FileCheck %s --check-prefix=CHECK-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s --check-prefix=CHECK-BMI + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +; X: [bit2] [bit0] +; Y: [bit3] [bit1] + +define i8 @out8_constmask(i8 %x, i8 %y) { +; CHECK-NOBMI-LABEL: out8_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andb $85, %dil +; CHECK-NOBMI-NEXT: andb $-86, %sil +; CHECK-NOBMI-NEXT: orb %dil, %sil +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out8_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andb $85, %dil +; CHECK-BMI-NEXT: andb $-86, %sil +; CHECK-BMI-NEXT: orb %dil, %sil +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %mx = and i8 %x, 85 + %my = and i8 %y, -86 + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16_constmask(i16 %x, i16 %y) { +; CHECK-NOBMI-LABEL: out16_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: andl $21845, %edi # imm = 0x5555 +; CHECK-NOBMI-NEXT: andl $-21846, %esi # imm = 0xAAAA +; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out16_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-BMI-NEXT: andl $21845, %edi # imm = 0x5555 +; CHECK-BMI-NEXT: andl $-21846, %esi # imm = 0xAAAA +; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI-NEXT: retq + %mx = and i16 %x, 21845 + %my = and i16 %y, -21846 + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32_constmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: out32_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-NOBMI-NEXT: andl $-1431655766, %esi # imm = 0xAAAAAAAA +; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out32_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-BMI-NEXT: andl $-1431655766, %esi # imm = 0xAAAAAAAA +; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-BMI-NEXT: retq + %mx = and i32 %x, 1431655765 + %my = and i32 %y, -1431655766 + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64_constmask(i64 %x, i64 %y) { +; CHECK-NOBMI-LABEL: out64_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; CHECK-NOBMI-NEXT: andq %rdi, %rcx +; CHECK-NOBMI-NEXT: movabsq $-6148914691236517206, %rax # imm = 0xAAAAAAAAAAAAAAAA +; CHECK-NOBMI-NEXT: andq %rsi, %rax +; CHECK-NOBMI-NEXT: orq %rcx, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out64_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; CHECK-BMI-NEXT: andq %rdi, %rcx +; CHECK-BMI-NEXT: movabsq $-6148914691236517206, %rax # imm = 0xAAAAAAAAAAAAAAAA +; CHECK-BMI-NEXT: andq %rsi, %rax +; CHECK-BMI-NEXT: orq %rcx, %rax +; CHECK-BMI-NEXT: retq + %mx = and i64 %x, 6148914691236517205 + %my = and i64 %y, -6148914691236517206 + %r = or i64 %mx, %my + ret i64 %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8_constmask(i8 %x, i8 %y) { +; CHECK-NOBMI-LABEL: in8_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andb $85, %dil +; CHECK-NOBMI-NEXT: xorb %dil, %sil +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in8_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andb $85, %dil +; CHECK-BMI-NEXT: xorb %dil, %sil +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, 85 + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16_constmask(i16 %x, i16 %y) { +; CHECK-NOBMI-LABEL: in16_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $21845, %edi # imm = 0x5555 +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in16_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $21845, %edi # imm = 0x5555 +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, 21845 + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32_constmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in32_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in32_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64_constmask(i64 %x, i64 %y) { +; CHECK-NOBMI-LABEL: in64_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorq %rsi, %rdi +; CHECK-NOBMI-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555 +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: xorq %rsi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in64_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorq %rsi, %rdi +; CHECK-BMI-NEXT: movabsq $6148914691236517205, %rax # imm = 0x5555555555555555 +; CHECK-BMI-NEXT: andq %rdi, %rax +; CHECK-BMI-NEXT: xorq %rsi, %rax +; CHECK-BMI-NEXT: retq + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, 6148914691236517205 + %r = xor i64 %n1, %y + ret i64 %r +} + +; ============================================================================ ; +; Constant Commutativity tests. +; ============================================================================ ; + +define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in_constmask_commutativity_0_1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_constmask_commutativity_0_1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_constmask_commutativity_1_0: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_constmask_commutativity_1_1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} + +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; + +define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-NOBMI-LABEL: in_complex_y0_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y0_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-NOBMI-LABEL: in_complex_y1_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y1_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %y, %n1 + ret i32 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; Multi-use tests. + +declare void @use32(i32) nounwind + +define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-NOBMI-LABEL: in_multiuse_A_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: pushq %rbp +; CHECK-NOBMI-NEXT: pushq %rbx +; CHECK-NOBMI-NEXT: pushq %rax +; CHECK-NOBMI-NEXT: movl %esi, %ebx +; CHECK-NOBMI-NEXT: movl %edi, %ebp +; CHECK-NOBMI-NEXT: xorl %esi, %ebp +; CHECK-NOBMI-NEXT: andl $1431655765, %ebp # imm = 0x55555555 +; CHECK-NOBMI-NEXT: movl %ebp, %edi +; CHECK-NOBMI-NEXT: callq use32 +; CHECK-NOBMI-NEXT: xorl %ebx, %ebp +; CHECK-NOBMI-NEXT: movl %ebp, %eax +; CHECK-NOBMI-NEXT: addq $8, %rsp +; CHECK-NOBMI-NEXT: popq %rbx +; CHECK-NOBMI-NEXT: popq %rbp +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_multiuse_A_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: pushq %rbp +; CHECK-BMI-NEXT: pushq %rbx +; CHECK-BMI-NEXT: pushq %rax +; CHECK-BMI-NEXT: movl %esi, %ebx +; CHECK-BMI-NEXT: movl %edi, %ebp +; CHECK-BMI-NEXT: xorl %esi, %ebp +; CHECK-BMI-NEXT: andl $1431655765, %ebp # imm = 0x55555555 +; CHECK-BMI-NEXT: movl %ebp, %edi +; CHECK-BMI-NEXT: callq use32 +; CHECK-BMI-NEXT: xorl %ebx, %ebp +; CHECK-BMI-NEXT: movl %ebp, %eax +; CHECK-BMI-NEXT: addq $8, %rsp +; CHECK-BMI-NEXT: popq %rbx +; CHECK-BMI-NEXT: popq %rbp +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-NOBMI-LABEL: in_multiuse_B_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: pushq %rbp +; CHECK-NOBMI-NEXT: pushq %rbx +; CHECK-NOBMI-NEXT: pushq %rax +; CHECK-NOBMI-NEXT: movl %esi, %ebx +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %ebp +; CHECK-NOBMI-NEXT: andl $1431655765, %ebp # imm = 0x55555555 +; CHECK-NOBMI-NEXT: callq use32 +; CHECK-NOBMI-NEXT: xorl %ebx, %ebp +; CHECK-NOBMI-NEXT: movl %ebp, %eax +; CHECK-NOBMI-NEXT: addq $8, %rsp +; CHECK-NOBMI-NEXT: popq %rbx +; CHECK-NOBMI-NEXT: popq %rbp +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_multiuse_B_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: pushq %rbp +; CHECK-BMI-NEXT: pushq %rbx +; CHECK-BMI-NEXT: pushq %rax +; CHECK-BMI-NEXT: movl %esi, %ebx +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %ebp +; CHECK-BMI-NEXT: andl $1431655765, %ebp # imm = 0x55555555 +; CHECK-BMI-NEXT: callq use32 +; CHECK-BMI-NEXT: xorl %ebx, %ebp +; CHECK-BMI-NEXT: movl %ebp, %eax +; CHECK-BMI-NEXT: addq $8, %rsp +; CHECK-BMI-NEXT: popq %rbx +; CHECK-BMI-NEXT: popq %rbp +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} + +; Various bad variants + +define i32 @n0_badconstmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: n0_badconstmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-NOBMI-NEXT: andl $-1431655765, %esi # imm = 0xAAAAAAAB +; CHECK-NOBMI-NEXT: orl %edi, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: n0_badconstmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-BMI-NEXT: andl $-1431655765, %esi # imm = 0xAAAAAAAB +; CHECK-BMI-NEXT: orl %edi, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %mx = and i32 %x, 1431655765 + %my = and i32 %y, -1431655765 ; instead of -1431655766 + %r = or i32 %mx, %my + ret i32 %r +} + +define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { +; CHECK-NOBMI-LABEL: n1_thirdvar_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-NOBMI-NEXT: xorl %edx, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: n1_thirdvar_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $1431655765, %edi # imm = 0x55555555 +; CHECK-BMI-NEXT: xorl %edx, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 1431655765 + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} Index: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll +++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll @@ -0,0 +1,458 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi < %s | FileCheck %s --check-prefix=CHECK-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s --check-prefix=CHECK-BMI + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +; X: [bit 3210] +; Y: [bit 7654] + +define i8 @out8_constmask(i8 %x, i8 %y) { +; CHECK-NOBMI-LABEL: out8_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andb $15, %dil +; CHECK-NOBMI-NEXT: andb $-16, %sil +; CHECK-NOBMI-NEXT: orb %dil, %sil +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out8_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andb $15, %dil +; CHECK-BMI-NEXT: andb $-16, %sil +; CHECK-BMI-NEXT: orb %dil, %sil +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %mx = and i8 %x, 15 + %my = and i8 %y, -16 + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16_constmask(i16 %x, i16 %y) { +; CHECK-NOBMI-LABEL: out16_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: andl $3855, %edi # imm = 0xF0F +; CHECK-NOBMI-NEXT: andl $-3856, %esi # imm = 0xF0F0 +; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out16_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-BMI-NEXT: andl $3855, %edi # imm = 0xF0F +; CHECK-BMI-NEXT: andl $-3856, %esi # imm = 0xF0F0 +; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI-NEXT: retq + %mx = and i16 %x, 3855 + %my = and i16 %y, -3856 + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32_constmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: out32_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: andl $-252645136, %esi # imm = 0xF0F0F0F0 +; CHECK-NOBMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out32_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-BMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: andl $-252645136, %esi # imm = 0xF0F0F0F0 +; CHECK-BMI-NEXT: leal (%rsi,%rdi), %eax +; CHECK-BMI-NEXT: retq + %mx = and i32 %x, 252645135 + %my = and i32 %y, -252645136 + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64_constmask(i64 %x, i64 %y) { +; CHECK-NOBMI-LABEL: out64_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F +; CHECK-NOBMI-NEXT: andq %rdi, %rcx +; CHECK-NOBMI-NEXT: movabsq $-1085102592571150096, %rax # imm = 0xF0F0F0F0F0F0F0F0 +; CHECK-NOBMI-NEXT: andq %rsi, %rax +; CHECK-NOBMI-NEXT: orq %rcx, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out64_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F +; CHECK-BMI-NEXT: andq %rdi, %rcx +; CHECK-BMI-NEXT: movabsq $-1085102592571150096, %rax # imm = 0xF0F0F0F0F0F0F0F0 +; CHECK-BMI-NEXT: andq %rsi, %rax +; CHECK-BMI-NEXT: orq %rcx, %rax +; CHECK-BMI-NEXT: retq + %mx = and i64 %x, 1085102592571150095 + %my = and i64 %y, -1085102592571150096 + %r = or i64 %mx, %my + ret i64 %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8_constmask(i8 %x, i8 %y) { +; CHECK-NOBMI-LABEL: in8_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andb $15, %dil +; CHECK-NOBMI-NEXT: xorb %dil, %sil +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in8_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andb $15, %dil +; CHECK-BMI-NEXT: xorb %dil, %sil +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, 15 + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16_constmask(i16 %x, i16 %y) { +; CHECK-NOBMI-LABEL: in16_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $3855, %edi # imm = 0xF0F +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in16_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $3855, %edi # imm = 0xF0F +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, 3855 + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32_constmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in32_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in32_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64_constmask(i64 %x, i64 %y) { +; CHECK-NOBMI-LABEL: in64_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorq %rsi, %rdi +; CHECK-NOBMI-NEXT: movabsq $1085102592571150095, %rax # imm = 0xF0F0F0F0F0F0F0F +; CHECK-NOBMI-NEXT: andq %rdi, %rax +; CHECK-NOBMI-NEXT: xorq %rsi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in64_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorq %rsi, %rdi +; CHECK-BMI-NEXT: movabsq $1085102592571150095, %rax # imm = 0xF0F0F0F0F0F0F0F +; CHECK-BMI-NEXT: andq %rdi, %rax +; CHECK-BMI-NEXT: xorq %rsi, %rax +; CHECK-BMI-NEXT: retq + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, 1085102592571150095 + %r = xor i64 %n1, %y + ret i64 %r +} + +; ============================================================================ ; +; Constant Commutativity tests. +; ============================================================================ ; + +define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in_constmask_commutativity_0_1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_constmask_commutativity_0_1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_constmask_commutativity_1_0: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_constmask_commutativity_1_1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} + +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; + +define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-NOBMI-LABEL: in_complex_y0_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y0_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-NOBMI-LABEL: in_complex_y1_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y1_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %y, %n1 + ret i32 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; Multi-use tests. + +declare void @use32(i32) nounwind + +define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-NOBMI-LABEL: in_multiuse_A_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: pushq %rbp +; CHECK-NOBMI-NEXT: pushq %rbx +; CHECK-NOBMI-NEXT: pushq %rax +; CHECK-NOBMI-NEXT: movl %esi, %ebx +; CHECK-NOBMI-NEXT: movl %edi, %ebp +; CHECK-NOBMI-NEXT: xorl %esi, %ebp +; CHECK-NOBMI-NEXT: andl $252645135, %ebp # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: movl %ebp, %edi +; CHECK-NOBMI-NEXT: callq use32 +; CHECK-NOBMI-NEXT: xorl %ebx, %ebp +; CHECK-NOBMI-NEXT: movl %ebp, %eax +; CHECK-NOBMI-NEXT: addq $8, %rsp +; CHECK-NOBMI-NEXT: popq %rbx +; CHECK-NOBMI-NEXT: popq %rbp +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_multiuse_A_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: pushq %rbp +; CHECK-BMI-NEXT: pushq %rbx +; CHECK-BMI-NEXT: pushq %rax +; CHECK-BMI-NEXT: movl %esi, %ebx +; CHECK-BMI-NEXT: movl %edi, %ebp +; CHECK-BMI-NEXT: xorl %esi, %ebp +; CHECK-BMI-NEXT: andl $252645135, %ebp # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: movl %ebp, %edi +; CHECK-BMI-NEXT: callq use32 +; CHECK-BMI-NEXT: xorl %ebx, %ebp +; CHECK-BMI-NEXT: movl %ebp, %eax +; CHECK-BMI-NEXT: addq $8, %rsp +; CHECK-BMI-NEXT: popq %rbx +; CHECK-BMI-NEXT: popq %rbp +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-NOBMI-LABEL: in_multiuse_B_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: pushq %rbp +; CHECK-NOBMI-NEXT: pushq %rbx +; CHECK-NOBMI-NEXT: pushq %rax +; CHECK-NOBMI-NEXT: movl %esi, %ebx +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %ebp +; CHECK-NOBMI-NEXT: andl $252645135, %ebp # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: callq use32 +; CHECK-NOBMI-NEXT: xorl %ebx, %ebp +; CHECK-NOBMI-NEXT: movl %ebp, %eax +; CHECK-NOBMI-NEXT: addq $8, %rsp +; CHECK-NOBMI-NEXT: popq %rbx +; CHECK-NOBMI-NEXT: popq %rbp +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_multiuse_B_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: pushq %rbp +; CHECK-BMI-NEXT: pushq %rbx +; CHECK-BMI-NEXT: pushq %rax +; CHECK-BMI-NEXT: movl %esi, %ebx +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %ebp +; CHECK-BMI-NEXT: andl $252645135, %ebp # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: callq use32 +; CHECK-BMI-NEXT: xorl %ebx, %ebp +; CHECK-BMI-NEXT: movl %ebp, %eax +; CHECK-BMI-NEXT: addq $8, %rsp +; CHECK-BMI-NEXT: popq %rbx +; CHECK-BMI-NEXT: popq %rbp +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} + +; Various bad variants + +define i32 @n0_badconstmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: n0_badconstmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: andl $-252645135, %esi # imm = 0xF0F0F0F1 +; CHECK-NOBMI-NEXT: orl %edi, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: n0_badconstmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: andl $-252645135, %esi # imm = 0xF0F0F0F1 +; CHECK-BMI-NEXT: orl %edi, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %mx = and i32 %x, 252645135 + %my = and i32 %y, -252645135 ; instead of -252645136 + %r = or i32 %mx, %my + ret i32 %r +} + +define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { +; CHECK-NOBMI-LABEL: n1_thirdvar_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-NOBMI-NEXT: xorl %edx, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: n1_thirdvar_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F +; CHECK-BMI-NEXT: xorl %edx, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 252645135 + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} Index: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll +++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll @@ -0,0 +1,426 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi < %s | FileCheck %s --check-prefix=CHECK-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s --check-prefix=CHECK-BMI + +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +; X: [byte1][byte0] +; Y: [byte3][byte2] + +define i8 @out8_constmask(i8 %x, i8 %y) { +; CHECK-NOBMI-LABEL: out8_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andb $15, %dil +; CHECK-NOBMI-NEXT: andb $-16, %sil +; CHECK-NOBMI-NEXT: orb %dil, %sil +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out8_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andb $15, %dil +; CHECK-BMI-NEXT: andb $-16, %sil +; CHECK-BMI-NEXT: orb %dil, %sil +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %mx = and i8 %x, 15 + %my = and i8 %y, -16 + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16_constmask(i16 %x, i16 %y) { +; CHECK-NOBMI-LABEL: out16_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movzbl %dil, %eax +; CHECK-NOBMI-NEXT: andl $-256, %esi +; CHECK-NOBMI-NEXT: orl %esi, %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out16_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: movzbl %dil, %eax +; CHECK-BMI-NEXT: andl $-256, %esi +; CHECK-BMI-NEXT: orl %esi, %eax +; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI-NEXT: retq + %mx = and i16 %x, 255 + %my = and i16 %y, -256 + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32_constmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: out32_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movzwl %di, %eax +; CHECK-NOBMI-NEXT: andl $-65536, %esi # imm = 0xFFFF0000 +; CHECK-NOBMI-NEXT: orl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out32_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: movzwl %di, %eax +; CHECK-BMI-NEXT: andl $-65536, %esi # imm = 0xFFFF0000 +; CHECK-BMI-NEXT: orl %esi, %eax +; CHECK-BMI-NEXT: retq + %mx = and i32 %x, 65535 + %my = and i32 %y, -65536 + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64_constmask(i64 %x, i64 %y) { +; CHECK-NOBMI-LABEL: out64_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl %edi, %ecx +; CHECK-NOBMI-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 +; CHECK-NOBMI-NEXT: andq %rsi, %rax +; CHECK-NOBMI-NEXT: orq %rcx, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out64_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: movl %edi, %ecx +; CHECK-BMI-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 +; CHECK-BMI-NEXT: andq %rsi, %rax +; CHECK-BMI-NEXT: orq %rcx, %rax +; CHECK-BMI-NEXT: retq + %mx = and i64 %x, 4294967295 + %my = and i64 %y, -4294967296 + %r = or i64 %mx, %my + ret i64 %r +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8_constmask(i8 %x, i8 %y) { +; CHECK-NOBMI-LABEL: in8_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andb $15, %dil +; CHECK-NOBMI-NEXT: xorb %dil, %sil +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in8_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andb $15, %dil +; CHECK-BMI-NEXT: xorb %dil, %sil +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, 15 + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16_constmask(i16 %x, i16 %y) { +; CHECK-NOBMI-LABEL: in16_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movzbl %dil, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in16_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movzbl %dil, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI-NEXT: retq + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, 255 + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32_constmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in32_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movzwl %di, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in32_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movzwl %di, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64_constmask(i64 %x, i64 %y) { +; CHECK-NOBMI-LABEL: in64_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax +; CHECK-NOBMI-NEXT: xorq %rsi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in64_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: xorl %edi, %eax +; CHECK-BMI-NEXT: xorq %rsi, %rax +; CHECK-BMI-NEXT: retq + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, 4294967295 + %r = xor i64 %n1, %y + ret i64 %r +} + +; ============================================================================ ; +; Constant Commutativity tests. +; ============================================================================ ; + +define i32 @in_constmask_commutativity_0_1(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in_constmask_commutativity_0_1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movzwl %di, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_constmask_commutativity_0_1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movzwl %di, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_0(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: movzwl %si, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_constmask_commutativity_1_0: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: movzwl %si, %eax +; CHECK-BMI-NEXT: xorl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} + +define i32 @in_constmask_commutativity_1_1(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: in_constmask_commutativity_1_1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: movzwl %si, %eax +; CHECK-NOBMI-NEXT: xorl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_constmask_commutativity_1_1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: movzwl %si, %eax +; CHECK-BMI-NEXT: xorl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} + +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; + +define i32 @in_complex_y0_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-NOBMI-LABEL: in_complex_y0_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movzwl %di, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y0_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movzwl %di, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_complex_y1_constmask(i32 %x, i32 %y_hi, i32 %y_low) { +; CHECK-NOBMI-LABEL: in_complex_y1_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movzwl %di, %eax +; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y1_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movzwl %di, %eax +; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %y, %n1 + ret i32 %r +} + +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; + +; Multi-use tests. + +declare void @use32(i32) nounwind + +define i32 @in_multiuse_A_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-NOBMI-LABEL: in_multiuse_A_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: pushq %rbp +; CHECK-NOBMI-NEXT: pushq %rbx +; CHECK-NOBMI-NEXT: pushq %rax +; CHECK-NOBMI-NEXT: movl %esi, %ebx +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movzwl %di, %ebp +; CHECK-NOBMI-NEXT: movl %ebp, %edi +; CHECK-NOBMI-NEXT: callq use32 +; CHECK-NOBMI-NEXT: xorl %ebx, %ebp +; CHECK-NOBMI-NEXT: movl %ebp, %eax +; CHECK-NOBMI-NEXT: addq $8, %rsp +; CHECK-NOBMI-NEXT: popq %rbx +; CHECK-NOBMI-NEXT: popq %rbp +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_multiuse_A_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: pushq %rbp +; CHECK-BMI-NEXT: pushq %rbx +; CHECK-BMI-NEXT: pushq %rax +; CHECK-BMI-NEXT: movl %esi, %ebx +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movzwl %di, %ebp +; CHECK-BMI-NEXT: movl %ebp, %edi +; CHECK-BMI-NEXT: callq use32 +; CHECK-BMI-NEXT: xorl %ebx, %ebp +; CHECK-BMI-NEXT: movl %ebp, %eax +; CHECK-BMI-NEXT: addq $8, %rsp +; CHECK-BMI-NEXT: popq %rbx +; CHECK-BMI-NEXT: popq %rbp +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} + +define i32 @in_multiuse_B_constmask(i32 %x, i32 %y, i32 %z) nounwind { +; CHECK-NOBMI-LABEL: in_multiuse_B_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: pushq %rbp +; CHECK-NOBMI-NEXT: pushq %rbx +; CHECK-NOBMI-NEXT: pushq %rax +; CHECK-NOBMI-NEXT: movl %esi, %ebx +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movzwl %di, %ebp +; CHECK-NOBMI-NEXT: callq use32 +; CHECK-NOBMI-NEXT: xorl %ebx, %ebp +; CHECK-NOBMI-NEXT: movl %ebp, %eax +; CHECK-NOBMI-NEXT: addq $8, %rsp +; CHECK-NOBMI-NEXT: popq %rbx +; CHECK-NOBMI-NEXT: popq %rbp +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_multiuse_B_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: pushq %rbp +; CHECK-BMI-NEXT: pushq %rbx +; CHECK-BMI-NEXT: pushq %rax +; CHECK-BMI-NEXT: movl %esi, %ebx +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movzwl %di, %ebp +; CHECK-BMI-NEXT: callq use32 +; CHECK-BMI-NEXT: xorl %ebx, %ebp +; CHECK-BMI-NEXT: movl %ebp, %eax +; CHECK-BMI-NEXT: addq $8, %rsp +; CHECK-BMI-NEXT: popq %rbx +; CHECK-BMI-NEXT: popq %rbp +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} + +; Various bad variants + +define i32 @n0_badconstmask(i32 %x, i32 %y) { +; CHECK-NOBMI-LABEL: n0_badconstmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movzwl %di, %eax +; CHECK-NOBMI-NEXT: andl $-65535, %esi # imm = 0xFFFF0001 +; CHECK-NOBMI-NEXT: orl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: n0_badconstmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: movzwl %di, %eax +; CHECK-BMI-NEXT: andl $-65535, %esi # imm = 0xFFFF0001 +; CHECK-BMI-NEXT: orl %esi, %eax +; CHECK-BMI-NEXT: retq + %mx = and i32 %x, 65535 + %my = and i32 %y, -65535 ; instead of -65536 + %r = or i32 %mx, %my + ret i32 %r +} + +define i32 @n1_thirdvar_constmask(i32 %x, i32 %y, i32 %z) { +; CHECK-NOBMI-LABEL: n1_thirdvar_constmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movzwl %di, %eax +; CHECK-NOBMI-NEXT: xorl %edx, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: n1_thirdvar_constmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movzwl %di, %eax +; CHECK-BMI-NEXT: xorl %edx, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, 65535 + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +} Index: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll +++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll @@ -0,0 +1,708 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi < %s | FileCheck %s --check-prefix=CHECK-NOBMI +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s --check-prefix=CHECK-BMI +; https://bugs.llvm.org/show_bug.cgi?id=37104 + +define i8 @out8(i8 %x, i8 %y, i8 %mask) { +; CHECK-NOBMI-LABEL: out8: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: notb %dl +; CHECK-NOBMI-NEXT: andb %sil, %dl +; CHECK-NOBMI-NEXT: orb %dil, %dl +; CHECK-NOBMI-NEXT: movl %edx, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out8: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: notb %dl +; CHECK-BMI-NEXT: andb %sil, %dl +; CHECK-BMI-NEXT: orb %dil, %dl +; CHECK-BMI-NEXT: movl %edx, %eax +; CHECK-BMI-NEXT: retq + %mx = and i8 %x, %mask + %notmask = xor i8 %mask, -1 + %my = and i8 %y, %notmask + %r = or i8 %mx, %my + ret i8 %r +} + +define i16 @out16(i16 %x, i16 %y, i16 %mask) { +; CHECK-NOBMI-LABEL: out16: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: notl %edx +; CHECK-NOBMI-NEXT: andl %esi, %edx +; CHECK-NOBMI-NEXT: orl %edi, %edx +; CHECK-NOBMI-NEXT: movl %edx, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out16: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: andnl %esi, %edx, %eax +; CHECK-BMI-NEXT: orl %edi, %eax +; CHECK-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-BMI-NEXT: retq + %mx = and i16 %x, %mask + %notmask = xor i16 %mask, -1 + %my = and i16 %y, %notmask + %r = or i16 %mx, %my + ret i16 %r +} + +define i32 @out32(i32 %x, i32 %y, i32 %mask) { +; CHECK-NOBMI-LABEL: out32: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: notl %edx +; CHECK-NOBMI-NEXT: andl %esi, %edx +; CHECK-NOBMI-NEXT: orl %edi, %edx +; CHECK-NOBMI-NEXT: movl %edx, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out32: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: andnl %esi, %edx, %eax +; CHECK-BMI-NEXT: orl %edi, %eax +; CHECK-BMI-NEXT: retq + %mx = and i32 %x, %mask + %notmask = xor i32 %mask, -1 + %my = and i32 %y, %notmask + %r = or i32 %mx, %my + ret i32 %r +} + +define i64 @out64(i64 %x, i64 %y, i64 %mask) { +; CHECK-NOBMI-LABEL: out64: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andq %rdx, %rdi +; CHECK-NOBMI-NEXT: notq %rdx +; CHECK-NOBMI-NEXT: andq %rsi, %rdx +; CHECK-NOBMI-NEXT: orq %rdi, %rdx +; CHECK-NOBMI-NEXT: movq %rdx, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: out64: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andq %rdx, %rdi +; CHECK-BMI-NEXT: andnq %rsi, %rdx, %rax +; CHECK-BMI-NEXT: orq %rdi, %rax +; CHECK-BMI-NEXT: retq + %mx = and i64 %x, %mask + %notmask = xor i64 %mask, -1 + %my = and i64 %y, %notmask + %r = or i64 %mx, %my + ret i64 %r +} +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Should be the same as the previous one. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define i8 @in8(i8 %x, i8 %y, i8 %mask) { +; CHECK-NOBMI-LABEL: in8: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in8: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i8 %x, %y + %n1 = and i8 %n0, %mask + %r = xor i8 %n1, %y + ret i8 %r +} + +define i16 @in16(i16 %x, i16 %y, i16 %mask) { +; CHECK-NOBMI-LABEL: in16: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in16: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i16 %x, %y + %n1 = and i16 %n0, %mask + %r = xor i16 %n1, %y + ret i16 %r +} + +define i32 @in32(i32 %x, i32 %y, i32 %mask) { +; CHECK-NOBMI-LABEL: in32: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in32: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} + +define i64 @in64(i64 %x, i64 %y, i64 %mask) { +; CHECK-NOBMI-LABEL: in64: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorq %rsi, %rdi +; CHECK-NOBMI-NEXT: andq %rdx, %rdi +; CHECK-NOBMI-NEXT: xorq %rsi, %rdi +; CHECK-NOBMI-NEXT: movq %rdi, %rax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in64: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorq %rsi, %rdi +; CHECK-BMI-NEXT: andq %rdx, %rdi +; CHECK-BMI-NEXT: xorq %rsi, %rdi +; CHECK-BMI-NEXT: movq %rdi, %rax +; CHECK-BMI-NEXT: retq + %n0 = xor i64 %x, %y + %n1 = and i64 %n0, %mask + %r = xor i64 %n1, %y + ret i64 %r +} +; ============================================================================ ; +; Commutativity tests. +; ============================================================================ ; +define i32 @in_commutativity_0_0_1(i32 %x, i32 %y, i32 %mask) { +; CHECK-NOBMI-LABEL: in_commutativity_0_0_1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_commutativity_0_0_1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 ; swapped + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_commutativity_0_1_0(i32 %x, i32 %y, i32 %mask) { +; CHECK-NOBMI-LABEL: in_commutativity_0_1_0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_commutativity_0_1_0: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} +define i32 @in_commutativity_0_1_1(i32 %x, i32 %y, i32 %mask) { +; CHECK-NOBMI-LABEL: in_commutativity_0_1_1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_commutativity_0_1_1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 ; swapped + %r = xor i32 %y, %n1 ; swapped + ret i32 %r +} +define i32 @in_commutativity_1_0_0(i32 %x, i32 %y, i32 %mask) { +; CHECK-NOBMI-LABEL: in_commutativity_1_0_0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_commutativity_1_0_0: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} +define i32 @in_commutativity_1_0_1(i32 %x, i32 %y, i32 %mask) { +; CHECK-NOBMI-LABEL: in_commutativity_1_0_1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_commutativity_1_0_1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 ; swapped + %r = xor i32 %n1, %x ; %x instead of %y + ret i32 %r +} +define i32 @in_commutativity_1_1_0(i32 %x, i32 %y, i32 %mask) { +; CHECK-NOBMI-LABEL: in_commutativity_1_1_0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_commutativity_1_1_0: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} +define i32 @in_commutativity_1_1_1(i32 %x, i32 %y, i32 %mask) { +; CHECK-NOBMI-LABEL: in_commutativity_1_1_1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %edi, %esi +; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_commutativity_1_1_1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %edi, %esi +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 ; swapped + %r = xor i32 %x, %n1 ; swapped, %x instead of %y + ret i32 %r +} +; ============================================================================ ; +; Y is an 'and' too. +; ============================================================================ ; +define i32 @in_complex_y0(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) { +; CHECK-NOBMI-LABEL: in_complex_y0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %ecx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y0: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %ecx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_complex_y1(i32 %x, i32 %y_hi, i32 %y_low, i32 %mask) { +; CHECK-NOBMI-LABEL: in_complex_y1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %ecx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %ecx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %y, %n1 + ret i32 %r +} +; ============================================================================ ; +; M is an 'xor' too. +; ============================================================================ ; +define i32 @in_complex_m0(i32 %x, i32 %y, i32 %m_a, i32 %m_b) { +; CHECK-NOBMI-LABEL: in_complex_m0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %ecx, %edx +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_m0: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %ecx, %edx +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_complex_m1(i32 %x, i32 %y, i32 %m_a, i32 %m_b) { +; CHECK-NOBMI-LABEL: in_complex_m1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %ecx, %edx +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_m1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %ecx, %edx +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 + %r = xor i32 %n1, %y + ret i32 %r +} +; ============================================================================ ; +; Both Y and M are complex. +; ============================================================================ ; +define i32 @in_complex_y0_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { +; CHECK-NOBMI-LABEL: in_complex_y0_m0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %r8d, %ecx +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %ecx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y0_m0: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %r8d, %ecx +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %ecx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_complex_y1_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { +; CHECK-NOBMI-LABEL: in_complex_y1_m0: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %r8d, %ecx +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %ecx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y1_m0: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %r8d, %ecx +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %ecx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %y, %n1 + ret i32 %r +} +define i32 @in_complex_y0_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { +; CHECK-NOBMI-LABEL: in_complex_y0_m1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %r8d, %ecx +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %ecx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y0_m1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %r8d, %ecx +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %ecx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_complex_y1_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { +; CHECK-NOBMI-LABEL: in_complex_y1_m1: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %esi +; CHECK-NOBMI-NEXT: xorl %r8d, %ecx +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %ecx, %edi +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_complex_y1_m1: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %esi +; CHECK-BMI-NEXT: xorl %r8d, %ecx +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %ecx, %edi +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %y = and i32 %y_hi, %y_low + %mask = xor i32 %m_a, %m_b + %n0 = xor i32 %x, %y + %n1 = and i32 %mask, %n0 + %r = xor i32 %y, %n1 + ret i32 %r +} +; ============================================================================ ; +; Negative tests. Should not be folded. +; ============================================================================ ; +; Multi-use tests. +declare void @use32(i32) nounwind +define i32 @in_multiuse_A(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind { +; CHECK-NOBMI-LABEL: in_multiuse_A: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: pushq %rbp +; CHECK-NOBMI-NEXT: pushq %rbx +; CHECK-NOBMI-NEXT: pushq %rax +; CHECK-NOBMI-NEXT: movl %esi, %ebx +; CHECK-NOBMI-NEXT: movl %edi, %ebp +; CHECK-NOBMI-NEXT: xorl %esi, %ebp +; CHECK-NOBMI-NEXT: andl %ecx, %ebp +; CHECK-NOBMI-NEXT: movl %ebp, %edi +; CHECK-NOBMI-NEXT: callq use32 +; CHECK-NOBMI-NEXT: xorl %ebx, %ebp +; CHECK-NOBMI-NEXT: movl %ebp, %eax +; CHECK-NOBMI-NEXT: addq $8, %rsp +; CHECK-NOBMI-NEXT: popq %rbx +; CHECK-NOBMI-NEXT: popq %rbp +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_multiuse_A: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: pushq %rbp +; CHECK-BMI-NEXT: pushq %rbx +; CHECK-BMI-NEXT: pushq %rax +; CHECK-BMI-NEXT: movl %esi, %ebx +; CHECK-BMI-NEXT: movl %edi, %ebp +; CHECK-BMI-NEXT: xorl %esi, %ebp +; CHECK-BMI-NEXT: andl %ecx, %ebp +; CHECK-BMI-NEXT: movl %ebp, %edi +; CHECK-BMI-NEXT: callq use32 +; CHECK-BMI-NEXT: xorl %ebx, %ebp +; CHECK-BMI-NEXT: movl %ebp, %eax +; CHECK-BMI-NEXT: addq $8, %rsp +; CHECK-BMI-NEXT: popq %rbx +; CHECK-BMI-NEXT: popq %rbp +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + call void @use32(i32 %n1) + %r = xor i32 %n1, %y + ret i32 %r +} +define i32 @in_multiuse_B(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind { +; CHECK-NOBMI-LABEL: in_multiuse_B: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: pushq %rbp +; CHECK-NOBMI-NEXT: pushq %rbx +; CHECK-NOBMI-NEXT: pushq %rax +; CHECK-NOBMI-NEXT: movl %ecx, %ebx +; CHECK-NOBMI-NEXT: movl %esi, %ebp +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %edi, %ebx +; CHECK-NOBMI-NEXT: callq use32 +; CHECK-NOBMI-NEXT: xorl %ebp, %ebx +; CHECK-NOBMI-NEXT: movl %ebx, %eax +; CHECK-NOBMI-NEXT: addq $8, %rsp +; CHECK-NOBMI-NEXT: popq %rbx +; CHECK-NOBMI-NEXT: popq %rbp +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: in_multiuse_B: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: pushq %rbp +; CHECK-BMI-NEXT: pushq %rbx +; CHECK-BMI-NEXT: pushq %rax +; CHECK-BMI-NEXT: movl %ecx, %ebx +; CHECK-BMI-NEXT: movl %esi, %ebp +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %edi, %ebx +; CHECK-BMI-NEXT: callq use32 +; CHECK-BMI-NEXT: xorl %ebp, %ebx +; CHECK-BMI-NEXT: movl %ebx, %eax +; CHECK-BMI-NEXT: addq $8, %rsp +; CHECK-BMI-NEXT: popq %rbx +; CHECK-BMI-NEXT: popq %rbp +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + call void @use32(i32 %n0) + %r = xor i32 %n1, %y + ret i32 %r +} +; Various bad variants +define i32 @n0_badmask(i32 %x, i32 %y, i32 %mask, i32 %mask2) { +; CHECK-NOBMI-LABEL: n0_badmask: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: notl %ecx +; CHECK-NOBMI-NEXT: andl %esi, %ecx +; CHECK-NOBMI-NEXT: orl %edi, %ecx +; CHECK-NOBMI-NEXT: movl %ecx, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: n0_badmask: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: andnl %esi, %ecx, %eax +; CHECK-BMI-NEXT: orl %edi, %eax +; CHECK-BMI-NEXT: retq + %mx = and i32 %x, %mask + %notmask = xor i32 %mask2, -1 ; %mask2 instead of %mask + %my = and i32 %y, %notmask + %r = or i32 %mx, %my + ret i32 %r +} +define i32 @n0_badxor(i32 %x, i32 %y, i32 %mask) { +; CHECK-NOBMI-LABEL: n0_badxor: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: andl %edx, %edi +; CHECK-NOBMI-NEXT: xorl $1, %edx +; CHECK-NOBMI-NEXT: andl %esi, %edx +; CHECK-NOBMI-NEXT: orl %edi, %edx +; CHECK-NOBMI-NEXT: movl %edx, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: n0_badxor: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: andl %edx, %edi +; CHECK-BMI-NEXT: xorl $1, %edx +; CHECK-BMI-NEXT: andl %esi, %edx +; CHECK-BMI-NEXT: orl %edi, %edx +; CHECK-BMI-NEXT: movl %edx, %eax +; CHECK-BMI-NEXT: retq + %mx = and i32 %x, %mask + %notmask = xor i32 %mask, 1 ; instead of -1 + %my = and i32 %y, %notmask + %r = or i32 %mx, %my + ret i32 %r +} +define i32 @n1_thirdvar(i32 %x, i32 %y, i32 %z, i32 %mask) { +; CHECK-NOBMI-LABEL: n1_thirdvar: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: xorl %esi, %edi +; CHECK-NOBMI-NEXT: andl %ecx, %edi +; CHECK-NOBMI-NEXT: xorl %edx, %edi +; CHECK-NOBMI-NEXT: movl %edi, %eax +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI-LABEL: n1_thirdvar: +; CHECK-BMI: # %bb.0: +; CHECK-BMI-NEXT: xorl %esi, %edi +; CHECK-BMI-NEXT: andl %ecx, %edi +; CHECK-BMI-NEXT: xorl %edx, %edi +; CHECK-BMI-NEXT: movl %edi, %eax +; CHECK-BMI-NEXT: retq + %n0 = xor i32 %x, %y + %n1 = and i32 %n0, %mask + %r = xor i32 %n1, %z ; instead of %y + ret i32 %r +}