diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define i1 @test_srem_odd(i29 %X) nounwind { +; CHECK-LABEL: test_srem_odd: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #33099 +; CHECK-NEXT: mov w10, #64874 +; CHECK-NEXT: sbfx w8, w0, #0, #29 +; CHECK-NEXT: movk w9, #48986, lsl #16 +; CHECK-NEXT: movk w10, #330, lsl #16 +; CHECK-NEXT: madd w8, w8, w9, w10 +; CHECK-NEXT: mov w9, #64213 +; CHECK-NEXT: movk w9, #661, lsl #16 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %srem = srem i29 %X, 99 + %cmp = icmp eq i29 %srem, 0 + ret i1 %cmp +} + +define i1 @test_srem_even(i4 %X) nounwind { +; CHECK-LABEL: test_srem_even: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: sbfx w8, w0, #0, #4 +; CHECK-NEXT: movk w9, #10922, lsl #16 +; CHECK-NEXT: smull x9, w8, w9 +; CHECK-NEXT: lsr x10, x9, #63 +; CHECK-NEXT: lsr x9, x9, #32 +; CHECK-NEXT: add w9, w9, w10 +; CHECK-NEXT: mov w10, #6 +; CHECK-NEXT: msub w8, w9, w10, w8 +; CHECK-NEXT: cmp w8, #1 // =1 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %srem = srem i4 %X, 6 + %cmp = icmp eq i4 %srem, 1 + ret i1 %cmp +} + +define i1 @test_srem_pow2_setne(i6 %X) nounwind { +; CHECK-LABEL: test_srem_pow2_setne: +; CHECK: // %bb.0: +; CHECK-NEXT: sbfx w8, w0, #0, #6 +; CHECK-NEXT: ubfx w8, w8, #9, #2 +; CHECK-NEXT: add w8, w0, w8 +; CHECK-NEXT: and w8, w8, #0x3c +; CHECK-NEXT: sub w8, w0, w8 +; CHECK-NEXT: tst w8, #0x3f +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %srem = srem i6 %X, 4 + %cmp = icmp ne i6 %srem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { +; CHECK-LABEL: test_srem_vec: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x10, #7281 +; CHECK-NEXT: movk x10, #29127, lsl #16 +; CHECK-NEXT: movk x10, #50972, lsl #32 +; CHECK-NEXT: sbfx x9, x2, #0, #33 +; CHECK-NEXT: movk x10, #7281, lsl #48 +; CHECK-NEXT: mov x11, #8589934591 +; CHECK-NEXT: mov x12, #7282 +; CHECK-NEXT: movk x12, #29127, lsl #16 +; CHECK-NEXT: dup v0.2d, x11 +; CHECK-NEXT: adrp x11, .LCPI3_0 +; CHECK-NEXT: smulh x10, x9, x10 +; CHECK-NEXT: movk x12, #50972, lsl #32 +; CHECK-NEXT: ldr q1, [x11, :lo12:.LCPI3_0] +; CHECK-NEXT: adrp x11, .LCPI3_1 +; CHECK-NEXT: sub x10, x10, x9 +; CHECK-NEXT: sbfx x8, x1, #0, #33 +; CHECK-NEXT: movk x12, #7281, lsl #48 +; CHECK-NEXT: ldr q2, [x11, :lo12:.LCPI3_1] +; CHECK-NEXT: asr x11, x10, #3 +; CHECK-NEXT: add x10, x11, x10, lsr #63 +; CHECK-NEXT: smulh x11, x8, x12 +; CHECK-NEXT: add x11, x11, x11, lsr #63 +; CHECK-NEXT: add x11, x11, x11, lsl #3 +; CHECK-NEXT: sub x8, x8, x11 +; CHECK-NEXT: sbfx x11, x0, #0, #33 +; CHECK-NEXT: smulh x12, x11, x12 +; CHECK-NEXT: add x12, x12, x12, lsr #63 +; CHECK-NEXT: add x12, x12, x12, lsl #3 +; CHECK-NEXT: sub x11, x11, x12 +; CHECK-NEXT: add x10, x10, x10, lsl #3 +; CHECK-NEXT: fmov d3, x11 +; CHECK-NEXT: add x9, x9, x10 +; CHECK-NEXT: mov v3.d[1], x8 +; CHECK-NEXT: fmov d4, x9 +; CHECK-NEXT: and v4.16b, v4.16b, v0.16b +; CHECK-NEXT: and v0.16b, v3.16b, v0.16b +; CHECK-NEXT: cmeq v0.2d, v0.2d, v1.2d +; CHECK-NEXT: cmeq v1.2d, v4.2d, v2.2d +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: fmov w2, s1 +; CHECK-NEXT: ret + %srem = srem <3 x i33> %X, + %cmp = icmp ne <3 x i33> %srem, + ret <3 x i1> %cmp +} diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define i1 @test_urem_odd(i13 %X) nounwind { +; CHECK-LABEL: test_urem_odd: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #52429 +; CHECK-NEXT: and w8, w0, #0x1fff +; CHECK-NEXT: movk w9, #52428, lsl #16 +; CHECK-NEXT: mul w8, w8, w9 +; CHECK-NEXT: mov w9, #13108 +; CHECK-NEXT: movk w9, #13107, lsl #16 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %urem = urem i13 %X, 5 + %cmp = icmp eq i13 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_even(i27 %X) nounwind { +; CHECK-LABEL: test_urem_even: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #28087 +; CHECK-NEXT: and w8, w0, #0x7ffffff +; CHECK-NEXT: movk w9, #46811, lsl #16 +; CHECK-NEXT: mul w8, w8, w9 +; CHECK-NEXT: mov w9, #9363 +; CHECK-NEXT: ror w8, w8, #1 +; CHECK-NEXT: movk w9, #4681, lsl #16 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %urem = urem i27 %X, 14 + %cmp = icmp eq i27 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_odd_setne(i4 %X) nounwind { +; CHECK-LABEL: test_urem_odd_setne: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #52429 +; CHECK-NEXT: and w8, w0, #0xf +; CHECK-NEXT: movk w9, #52428, lsl #16 +; CHECK-NEXT: mul w8, w8, w9 +; CHECK-NEXT: mov w9, #858993459 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %urem = urem i4 %X, 5 + %cmp = icmp ne i4 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_negative_odd(i9 %X) nounwind { +; CHECK-LABEL: test_urem_negative_odd: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w9, #57651 +; CHECK-NEXT: and w8, w0, #0x1ff +; CHECK-NEXT: movk w9, #43302, lsl #16 +; CHECK-NEXT: mul w8, w8, w9 +; CHECK-NEXT: mov w9, #17191 +; CHECK-NEXT: movk w9, #129, lsl #16 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %urem = urem i9 %X, -5 + %cmp = icmp ne i9 %urem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { +; CHECK-LABEL: test_urem_vec: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, #43691 +; CHECK-NEXT: and w8, w0, #0x7ff +; CHECK-NEXT: movk w12, #43690, lsl #16 +; CHECK-NEXT: umull x12, w8, w12 +; CHECK-NEXT: mov w11, #25663 +; CHECK-NEXT: mov w13, #6 +; CHECK-NEXT: lsr x12, x12, #34 +; CHECK-NEXT: and w10, w2, #0x7ff +; CHECK-NEXT: movk w11, #160, lsl #16 +; CHECK-NEXT: msub w8, w12, w13, w8 +; CHECK-NEXT: mov w12, #18725 +; CHECK-NEXT: and w9, w1, #0x7ff +; CHECK-NEXT: movk w12, #9362, lsl #16 +; CHECK-NEXT: umull x11, w10, w11 +; CHECK-NEXT: adrp x13, .LCPI4_0 +; CHECK-NEXT: umull x12, w9, w12 +; CHECK-NEXT: lsr x11, x11, #32 +; CHECK-NEXT: ldr d0, [x13, :lo12:.LCPI4_0] +; CHECK-NEXT: lsr x12, x12, #32 +; CHECK-NEXT: sub w13, w10, w11 +; CHECK-NEXT: add w11, w11, w13, lsr #1 +; CHECK-NEXT: sub w13, w9, w12 +; CHECK-NEXT: add w12, w12, w13, lsr #1 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: mov w8, #2043 +; CHECK-NEXT: lsr w11, w11, #10 +; CHECK-NEXT: lsr w12, w12, #2 +; CHECK-NEXT: msub w8, w11, w8, w10 +; CHECK-NEXT: sub w10, w12, w12, lsl #3 +; CHECK-NEXT: add w9, w9, w10 +; CHECK-NEXT: mov v1.h[1], w9 +; CHECK-NEXT: mov v1.h[2], w8 +; CHECK-NEXT: bic v1.4h, #248, lsl #8 +; CHECK-NEXT: cmeq v0.4h, v1.4h, v0.4h +; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: umov w1, v0.h[1] +; CHECK-NEXT: umov w2, v0.h[2] +; CHECK-NEXT: ret + %urem = urem <3 x i11> %X, + %cmp = icmp ne <3 x i11> %urem, + ret <3 x i1> %cmp +} diff --git a/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll @@ -0,0 +1,105 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-mesa3d < %s | FileCheck %s + +define i1 @test_srem_odd(i29 %X) nounwind { +; CHECK-LABEL: test_srem_odd: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_bfe_i32 v0, v0, 0, 29 +; CHECK-NEXT: s_mov_b32 s5, 0xa57eb503 +; CHECK-NEXT: s_movk_i32 s4, 0x63 +; CHECK-NEXT: v_mul_hi_i32 v1, v0, s5 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v0 +; CHECK-NEXT: v_lshrrev_b32_e32 v2, 31, v1 +; CHECK-NEXT: v_ashrrev_i32_e32 v1, 6, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, s4 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %srem = srem i29 %X, 99 + %cmp = icmp eq i29 %srem, 0 + ret i1 %cmp +} + +define i1 @test_srem_even(i4 %X) nounwind { +; CHECK-LABEL: test_srem_even: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_bfe_i32 v0, v0, 0, 4 +; CHECK-NEXT: s_mov_b32 s4, 0x2aaaaaab +; CHECK-NEXT: v_mul_hi_i32 v1, v0, s4 +; CHECK-NEXT: v_lshrrev_b32_e32 v2, 31, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, 6 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %srem = srem i4 %X, 6 + %cmp = icmp eq i4 %srem, 1 + ret i1 %cmp +} + +define i1 @test_srem_pow2_setne(i6 %X) nounwind { +; CHECK-LABEL: test_srem_pow2_setne: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_bfe_i32 v1, v0, 0, 6 +; CHECK-NEXT: v_bfe_u32 v1, v1, 9, 2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v0, v1 +; CHECK-NEXT: v_and_b32_e32 v1, 60, v1 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_and_b32_e32 v0, 63, v0 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %srem = srem i6 %X, 4 + %cmp = icmp ne i6 %srem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_srem_vec(<3 x i31> %X) nounwind { +; CHECK-LABEL: test_srem_vec: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_bfe_i32 v3, v2, 0, 31 +; CHECK-NEXT: v_bfe_i32 v4, v1, 0, 31 +; CHECK-NEXT: v_bfe_i32 v5, v0, 0, 31 +; CHECK-NEXT: s_mov_b32 s6, 0x38e38e39 +; CHECK-NEXT: s_mov_b32 s7, 0xc71c71c7 +; CHECK-NEXT: s_brev_b32 s4, -2 +; CHECK-NEXT: s_mov_b32 s5, 0x7ffffffd +; CHECK-NEXT: v_mul_hi_i32 v5, v5, s6 +; CHECK-NEXT: v_mul_hi_i32 v4, v4, s6 +; CHECK-NEXT: v_mul_hi_i32 v3, v3, s7 +; CHECK-NEXT: v_lshrrev_b32_e32 v6, 31, v5 +; CHECK-NEXT: v_lshrrev_b32_e32 v5, 1, v5 +; CHECK-NEXT: v_lshrrev_b32_e32 v7, 31, v4 +; CHECK-NEXT: v_lshrrev_b32_e32 v4, 1, v4 +; CHECK-NEXT: v_lshrrev_b32_e32 v8, 31, v3 +; CHECK-NEXT: v_lshrrev_b32_e32 v3, 1, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v8 +; CHECK-NEXT: v_mul_lo_u32 v5, v5, 9 +; CHECK-NEXT: v_mul_lo_u32 v4, v4, 9 +; CHECK-NEXT: v_mul_lo_u32 v3, v3, -9 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_and_b32_e32 v2, s4, v2 +; CHECK-NEXT: v_and_b32_e32 v1, s4, v1 +; CHECK-NEXT: v_and_b32_e32 v0, s4, v0 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 3, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 3, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %srem = srem <3 x i31> %X, + %cmp = icmp ne <3 x i31> %srem, + ret <3 x i1> %cmp +} diff --git a/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll @@ -0,0 +1,109 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-mesa3d < %s | FileCheck %s + +define i1 @test_urem_odd(i13 %X) nounwind { +; CHECK-LABEL: test_urem_odd: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v0, 0x1fff, v0 +; CHECK-NEXT: s_mov_b32 s4, 0xcccccccd +; CHECK-NEXT: v_mul_hi_u32 v1, v0, s4 +; CHECK-NEXT: v_lshrrev_b32_e32 v1, 2, v1 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, 5 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %urem = urem i13 %X, 5 + %cmp = icmp eq i13 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_even(i27 %X) nounwind { +; CHECK-LABEL: test_urem_even: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v1, 0x7ffffff, v0 +; CHECK-NEXT: v_bfe_u32 v0, v0, 1, 26 +; CHECK-NEXT: s_mov_b32 s4, 0x92492493 +; CHECK-NEXT: v_mul_hi_u32 v0, v0, s4 +; CHECK-NEXT: v_lshrrev_b32_e32 v0, 2, v0 +; CHECK-NEXT: v_mul_lo_u32 v0, v0, 14 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v1, v0 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %urem = urem i27 %X, 14 + %cmp = icmp eq i27 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_odd_setne(i4 %X) nounwind { +; CHECK-LABEL: test_urem_odd_setne: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v0, 15, v0 +; CHECK-NEXT: s_mov_b32 s4, 0xcccccccd +; CHECK-NEXT: v_mul_hi_u32 v1, v0, s4 +; CHECK-NEXT: v_lshrrev_b32_e32 v1, 2, v1 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, 5 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %urem = urem i4 %X, 5 + %cmp = icmp ne i4 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_negative_odd(i9 %X) nounwind { +; CHECK-LABEL: test_urem_negative_odd: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v0, 0x1ff, v0 +; CHECK-NEXT: s_mov_b32 s4, 0x2050c9f9 +; CHECK-NEXT: s_movk_i32 s5, 0x1fb +; CHECK-NEXT: v_mul_hi_u32 v1, v0, s4 +; CHECK-NEXT: v_lshrrev_b32_e32 v1, 6, v1 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, s5 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %urem = urem i9 %X, -5 + %cmp = icmp ne i9 %urem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { +; CHECK-LABEL: test_urem_vec: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s4, 0x7ff +; CHECK-NEXT: s_mov_b32 s5, 0x8311eb33 +; CHECK-NEXT: s_mov_b32 s6, 0x20140c +; CHECK-NEXT: s_mov_b32 s7, 0xb6db6db7 +; CHECK-NEXT: s_mov_b32 s11, 0x49249249 +; CHECK-NEXT: s_mov_b32 s8, 0x24924924 +; CHECK-NEXT: s_mov_b32 s9, 0xaaaaaaab +; CHECK-NEXT: s_mov_b32 s10, 0x2aaaaaaa +; CHECK-NEXT: v_and_b32_e32 v0, s4, v0 +; CHECK-NEXT: v_and_b32_e32 v1, s4, v1 +; CHECK-NEXT: v_and_b32_e32 v2, s4, v2 +; CHECK-NEXT: v_mul_lo_u32 v2, v2, s5 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, s7 +; CHECK-NEXT: v_mul_lo_u32 v0, v0, s9 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xf9dc299a, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, s11, v1 +; CHECK-NEXT: v_alignbit_b32 v0, v0, v0, 1 +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s10, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s8, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s6, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %urem = urem <3 x i11> %X, + %cmp = icmp ne <3 x i11> %urem, + ret <3 x i1> %cmp +} diff --git a/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/srem-seteq-illegal-types.ll @@ -0,0 +1,677 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv5-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=ARM5 +; RUN: llc -mtriple=armv6-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=ARM6 +; RUN: llc -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=ARM7 +; RUN: llc -mtriple=armv8-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=ARM8 +; RUN: llc -mtriple=armv7-unknown-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefixes=NEON7 +; RUN: llc -mtriple=armv8-unknown-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefixes=NEON8 + +define i1 @test_srem_odd(i29 %X) nounwind { +; ARM5-LABEL: test_srem_odd: +; ARM5: @ %bb.0: +; ARM5-NEXT: ldr r2, .LCPI0_1 +; ARM5-NEXT: lsl r0, r0, #3 +; ARM5-NEXT: asr r0, r0, #3 +; ARM5-NEXT: ldr r1, .LCPI0_0 +; ARM5-NEXT: mla r3, r0, r2, r1 +; ARM5-NEXT: ldr r1, .LCPI0_2 +; ARM5-NEXT: mov r0, #0 +; ARM5-NEXT: cmp r3, r1 +; ARM5-NEXT: movlo r0, #1 +; ARM5-NEXT: bx lr +; ARM5-NEXT: .p2align 2 +; ARM5-NEXT: @ %bb.1: +; ARM5-NEXT: .LCPI0_0: +; ARM5-NEXT: .long 21691754 @ 0x14afd6a +; ARM5-NEXT: .LCPI0_1: +; ARM5-NEXT: .long 3210379595 @ 0xbf5a814b +; ARM5-NEXT: .LCPI0_2: +; ARM5-NEXT: .long 43383509 @ 0x295fad5 +; +; ARM6-LABEL: test_srem_odd: +; ARM6: @ %bb.0: +; ARM6-NEXT: ldr r2, .LCPI0_1 +; ARM6-NEXT: lsl r0, r0, #3 +; ARM6-NEXT: asr r0, r0, #3 +; ARM6-NEXT: ldr r1, .LCPI0_0 +; ARM6-NEXT: mla r1, r0, r2, r1 +; ARM6-NEXT: ldr r2, .LCPI0_2 +; ARM6-NEXT: mov r0, #0 +; ARM6-NEXT: cmp r1, r2 +; ARM6-NEXT: movlo r0, #1 +; ARM6-NEXT: bx lr +; ARM6-NEXT: .p2align 2 +; ARM6-NEXT: @ %bb.1: +; ARM6-NEXT: .LCPI0_0: +; ARM6-NEXT: .long 21691754 @ 0x14afd6a +; ARM6-NEXT: .LCPI0_1: +; ARM6-NEXT: .long 3210379595 @ 0xbf5a814b +; ARM6-NEXT: .LCPI0_2: +; ARM6-NEXT: .long 43383509 @ 0x295fad5 +; +; ARM7-LABEL: test_srem_odd: +; ARM7: @ %bb.0: +; ARM7-NEXT: movw r1, #64874 +; ARM7-NEXT: movw r2, #33099 +; ARM7-NEXT: sbfx r0, r0, #0, #29 +; ARM7-NEXT: movt r1, #330 +; ARM7-NEXT: movt r2, #48986 +; ARM7-NEXT: mla r1, r0, r2, r1 +; ARM7-NEXT: movw r2, #64213 +; ARM7-NEXT: movt r2, #661 +; ARM7-NEXT: mov r0, #0 +; ARM7-NEXT: cmp r1, r2 +; ARM7-NEXT: movwlo r0, #1 +; ARM7-NEXT: bx lr +; +; ARM8-LABEL: test_srem_odd: +; ARM8: @ %bb.0: +; ARM8-NEXT: movw r1, #64874 +; ARM8-NEXT: movw r2, #33099 +; ARM8-NEXT: sbfx r0, r0, #0, #29 +; ARM8-NEXT: movt r1, #330 +; ARM8-NEXT: movt r2, #48986 +; ARM8-NEXT: mla r1, r0, r2, r1 +; ARM8-NEXT: movw r2, #64213 +; ARM8-NEXT: movt r2, #661 +; ARM8-NEXT: mov r0, #0 +; ARM8-NEXT: cmp r1, r2 +; ARM8-NEXT: movwlo r0, #1 +; ARM8-NEXT: bx lr +; +; NEON7-LABEL: test_srem_odd: +; NEON7: @ %bb.0: +; NEON7-NEXT: movw r1, #64874 +; NEON7-NEXT: movw r2, #33099 +; NEON7-NEXT: sbfx r0, r0, #0, #29 +; NEON7-NEXT: movt r1, #330 +; NEON7-NEXT: movt r2, #48986 +; NEON7-NEXT: mla r1, r0, r2, r1 +; NEON7-NEXT: movw r2, #64213 +; NEON7-NEXT: movt r2, #661 +; NEON7-NEXT: mov r0, #0 +; NEON7-NEXT: cmp r1, r2 +; NEON7-NEXT: movwlo r0, #1 +; NEON7-NEXT: bx lr +; +; NEON8-LABEL: test_srem_odd: +; NEON8: @ %bb.0: +; NEON8-NEXT: movw r1, #64874 +; NEON8-NEXT: movw r2, #33099 +; NEON8-NEXT: sbfx r0, r0, #0, #29 +; NEON8-NEXT: movt r1, #330 +; NEON8-NEXT: movt r2, #48986 +; NEON8-NEXT: mla r1, r0, r2, r1 +; NEON8-NEXT: movw r2, #64213 +; NEON8-NEXT: movt r2, #661 +; NEON8-NEXT: mov r0, #0 +; NEON8-NEXT: cmp r1, r2 +; NEON8-NEXT: movwlo r0, #1 +; NEON8-NEXT: bx lr + %srem = srem i29 %X, 99 + %cmp = icmp eq i29 %srem, 0 + ret i1 %cmp +} + +define i1 @test_srem_even(i4 %X) nounwind { +; ARM5-LABEL: test_srem_even: +; ARM5: @ %bb.0: +; ARM5-NEXT: ldr r2, .LCPI1_0 +; ARM5-NEXT: lsl r0, r0, #28 +; ARM5-NEXT: asr r12, r0, #28 +; ARM5-NEXT: smull r3, r1, r12, r2 +; ARM5-NEXT: add r1, r1, r1, lsr #31 +; ARM5-NEXT: add r1, r1, r1, lsl #1 +; ARM5-NEXT: mvn r1, r1, lsl #1 +; ARM5-NEXT: add r0, r1, r0, asr #28 +; ARM5-NEXT: clz r0, r0 +; ARM5-NEXT: lsr r0, r0, #5 +; ARM5-NEXT: bx lr +; ARM5-NEXT: .p2align 2 +; ARM5-NEXT: @ %bb.1: +; ARM5-NEXT: .LCPI1_0: +; ARM5-NEXT: .long 715827883 @ 0x2aaaaaab +; +; ARM6-LABEL: test_srem_even: +; ARM6: @ %bb.0: +; ARM6-NEXT: ldr r2, .LCPI1_0 +; ARM6-NEXT: lsl r0, r0, #28 +; ARM6-NEXT: asr r1, r0, #28 +; ARM6-NEXT: smmul r1, r1, r2 +; ARM6-NEXT: add r1, r1, r1, lsr #31 +; ARM6-NEXT: add r1, r1, r1, lsl #1 +; ARM6-NEXT: mvn r1, r1, lsl #1 +; ARM6-NEXT: add r0, r1, r0, asr #28 +; ARM6-NEXT: clz r0, r0 +; ARM6-NEXT: lsr r0, r0, #5 +; ARM6-NEXT: bx lr +; ARM6-NEXT: .p2align 2 +; ARM6-NEXT: @ %bb.1: +; ARM6-NEXT: .LCPI1_0: +; ARM6-NEXT: .long 715827883 @ 0x2aaaaaab +; +; ARM7-LABEL: test_srem_even: +; ARM7: @ %bb.0: +; ARM7-NEXT: movw r2, #43691 +; ARM7-NEXT: sbfx r1, r0, #0, #4 +; ARM7-NEXT: movt r2, #10922 +; ARM7-NEXT: lsl r0, r0, #28 +; ARM7-NEXT: smmul r1, r1, r2 +; ARM7-NEXT: add r1, r1, r1, lsr #31 +; ARM7-NEXT: add r1, r1, r1, lsl #1 +; ARM7-NEXT: mvn r1, r1, lsl #1 +; ARM7-NEXT: add r0, r1, r0, asr #28 +; ARM7-NEXT: clz r0, r0 +; ARM7-NEXT: lsr r0, r0, #5 +; ARM7-NEXT: bx lr +; +; ARM8-LABEL: test_srem_even: +; ARM8: @ %bb.0: +; ARM8-NEXT: movw r2, #43691 +; ARM8-NEXT: sbfx r1, r0, #0, #4 +; ARM8-NEXT: movt r2, #10922 +; ARM8-NEXT: lsl r0, r0, #28 +; ARM8-NEXT: smmul r1, r1, r2 +; ARM8-NEXT: add r1, r1, r1, lsr #31 +; ARM8-NEXT: add r1, r1, r1, lsl #1 +; ARM8-NEXT: mvn r1, r1, lsl #1 +; ARM8-NEXT: add r0, r1, r0, asr #28 +; ARM8-NEXT: clz r0, r0 +; ARM8-NEXT: lsr r0, r0, #5 +; ARM8-NEXT: bx lr +; +; NEON7-LABEL: test_srem_even: +; NEON7: @ %bb.0: +; NEON7-NEXT: movw r2, #43691 +; NEON7-NEXT: sbfx r1, r0, #0, #4 +; NEON7-NEXT: movt r2, #10922 +; NEON7-NEXT: lsl r0, r0, #28 +; NEON7-NEXT: smmul r1, r1, r2 +; NEON7-NEXT: add r1, r1, r1, lsr #31 +; NEON7-NEXT: add r1, r1, r1, lsl #1 +; NEON7-NEXT: mvn r1, r1, lsl #1 +; NEON7-NEXT: add r0, r1, r0, asr #28 +; NEON7-NEXT: clz r0, r0 +; NEON7-NEXT: lsr r0, r0, #5 +; NEON7-NEXT: bx lr +; +; NEON8-LABEL: test_srem_even: +; NEON8: @ %bb.0: +; NEON8-NEXT: movw r2, #43691 +; NEON8-NEXT: sbfx r1, r0, #0, #4 +; NEON8-NEXT: movt r2, #10922 +; NEON8-NEXT: lsl r0, r0, #28 +; NEON8-NEXT: smmul r1, r1, r2 +; NEON8-NEXT: add r1, r1, r1, lsr #31 +; NEON8-NEXT: add r1, r1, r1, lsl #1 +; NEON8-NEXT: mvn r1, r1, lsl #1 +; NEON8-NEXT: add r0, r1, r0, asr #28 +; NEON8-NEXT: clz r0, r0 +; NEON8-NEXT: lsr r0, r0, #5 +; NEON8-NEXT: bx lr + %srem = srem i4 %X, 6 + %cmp = icmp eq i4 %srem, 1 + ret i1 %cmp +} + +define i1 @test_srem_pow2_setne(i6 %X) nounwind { +; ARM5-LABEL: test_srem_pow2_setne: +; ARM5: @ %bb.0: +; ARM5-NEXT: lsl r1, r0, #26 +; ARM5-NEXT: mov r2, #3 +; ARM5-NEXT: asr r1, r1, #26 +; ARM5-NEXT: and r1, r2, r1, lsr #9 +; ARM5-NEXT: add r1, r0, r1 +; ARM5-NEXT: and r1, r1, #60 +; ARM5-NEXT: sub r0, r0, r1 +; ARM5-NEXT: ands r0, r0, #63 +; ARM5-NEXT: movne r0, #1 +; ARM5-NEXT: bx lr +; +; ARM6-LABEL: test_srem_pow2_setne: +; ARM6: @ %bb.0: +; ARM6-NEXT: lsl r1, r0, #26 +; ARM6-NEXT: mov r2, #3 +; ARM6-NEXT: asr r1, r1, #26 +; ARM6-NEXT: and r1, r2, r1, lsr #9 +; ARM6-NEXT: add r1, r0, r1 +; ARM6-NEXT: and r1, r1, #60 +; ARM6-NEXT: sub r0, r0, r1 +; ARM6-NEXT: ands r0, r0, #63 +; ARM6-NEXT: movne r0, #1 +; ARM6-NEXT: bx lr +; +; ARM7-LABEL: test_srem_pow2_setne: +; ARM7: @ %bb.0: +; ARM7-NEXT: sbfx r1, r0, #0, #6 +; ARM7-NEXT: ubfx r1, r1, #9, #2 +; ARM7-NEXT: add r1, r0, r1 +; ARM7-NEXT: and r1, r1, #60 +; ARM7-NEXT: sub r0, r0, r1 +; ARM7-NEXT: ands r0, r0, #63 +; ARM7-NEXT: movwne r0, #1 +; ARM7-NEXT: bx lr +; +; ARM8-LABEL: test_srem_pow2_setne: +; ARM8: @ %bb.0: +; ARM8-NEXT: sbfx r1, r0, #0, #6 +; ARM8-NEXT: ubfx r1, r1, #9, #2 +; ARM8-NEXT: add r1, r0, r1 +; ARM8-NEXT: and r1, r1, #60 +; ARM8-NEXT: sub r0, r0, r1 +; ARM8-NEXT: ands r0, r0, #63 +; ARM8-NEXT: movwne r0, #1 +; ARM8-NEXT: bx lr +; +; NEON7-LABEL: test_srem_pow2_setne: +; NEON7: @ %bb.0: +; NEON7-NEXT: sbfx r1, r0, #0, #6 +; NEON7-NEXT: ubfx r1, r1, #9, #2 +; NEON7-NEXT: add r1, r0, r1 +; NEON7-NEXT: and r1, r1, #60 +; NEON7-NEXT: sub r0, r0, r1 +; NEON7-NEXT: ands r0, r0, #63 +; NEON7-NEXT: movwne r0, #1 +; NEON7-NEXT: bx lr +; +; NEON8-LABEL: test_srem_pow2_setne: +; NEON8: @ %bb.0: +; NEON8-NEXT: sbfx r1, r0, #0, #6 +; NEON8-NEXT: ubfx r1, r1, #9, #2 +; NEON8-NEXT: add r1, r0, r1 +; NEON8-NEXT: and r1, r1, #60 +; NEON8-NEXT: sub r0, r0, r1 +; NEON8-NEXT: ands r0, r0, #63 +; NEON8-NEXT: movwne r0, #1 +; NEON8-NEXT: bx lr + %srem = srem i6 %X, 4 + %cmp = icmp ne i6 %srem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { +; ARM5-LABEL: test_srem_vec: +; ARM5: @ %bb.0: +; ARM5-NEXT: push {r4, r5, r6, lr} +; ARM5-NEXT: and r1, r1, #1 +; ARM5-NEXT: mov r5, r3 +; ARM5-NEXT: rsb r1, r1, #0 +; ARM5-NEXT: mov r6, r2 +; ARM5-NEXT: mov r2, #9 +; ARM5-NEXT: mov r3, #0 +; ARM5-NEXT: bl __moddi3 +; ARM5-NEXT: eor r0, r0, #3 +; ARM5-NEXT: mov r2, #9 +; ARM5-NEXT: orrs r4, r0, r1 +; ARM5-NEXT: and r0, r5, #1 +; ARM5-NEXT: rsb r1, r0, #0 +; ARM5-NEXT: mov r0, r6 +; ARM5-NEXT: mov r3, #0 +; ARM5-NEXT: movne r4, #1 +; ARM5-NEXT: bl __moddi3 +; ARM5-NEXT: mov r2, #1 +; ARM5-NEXT: bic r1, r2, r1 +; ARM5-NEXT: mvn r2, #2 +; ARM5-NEXT: eor r0, r0, r2 +; ARM5-NEXT: orrs r5, r0, r1 +; ARM5-NEXT: ldr r0, [sp, #20] +; ARM5-NEXT: mvn r2, #8 +; ARM5-NEXT: mvn r3, #0 +; ARM5-NEXT: and r0, r0, #1 +; ARM5-NEXT: movne r5, #1 +; ARM5-NEXT: rsb r1, r0, #0 +; ARM5-NEXT: ldr r0, [sp, #16] +; ARM5-NEXT: bl __moddi3 +; ARM5-NEXT: eor r0, r0, #3 +; ARM5-NEXT: orrs r2, r0, r1 +; ARM5-NEXT: mov r0, r4 +; ARM5-NEXT: movne r2, #1 +; ARM5-NEXT: mov r1, r5 +; ARM5-NEXT: pop {r4, r5, r6, pc} +; +; ARM6-LABEL: test_srem_vec: +; ARM6: @ %bb.0: +; ARM6-NEXT: push {r4, r5, r6, lr} +; ARM6-NEXT: and r1, r1, #1 +; ARM6-NEXT: mov r5, r3 +; ARM6-NEXT: rsb r1, r1, #0 +; ARM6-NEXT: mov r6, r2 +; ARM6-NEXT: mov r2, #9 +; ARM6-NEXT: mov r3, #0 +; ARM6-NEXT: bl __moddi3 +; ARM6-NEXT: eor r0, r0, #3 +; ARM6-NEXT: mov r2, #9 +; ARM6-NEXT: orrs r4, r0, r1 +; ARM6-NEXT: and r0, r5, #1 +; ARM6-NEXT: rsb r1, r0, #0 +; ARM6-NEXT: mov r0, r6 +; ARM6-NEXT: mov r3, #0 +; ARM6-NEXT: movne r4, #1 +; ARM6-NEXT: bl __moddi3 +; ARM6-NEXT: mov r2, #1 +; ARM6-NEXT: bic r1, r2, r1 +; ARM6-NEXT: mvn r2, #2 +; ARM6-NEXT: eor r0, r0, r2 +; ARM6-NEXT: orrs r5, r0, r1 +; ARM6-NEXT: ldr r0, [sp, #20] +; ARM6-NEXT: mvn r2, #8 +; ARM6-NEXT: mvn r3, #0 +; ARM6-NEXT: and r0, r0, #1 +; ARM6-NEXT: movne r5, #1 +; ARM6-NEXT: rsb r1, r0, #0 +; ARM6-NEXT: ldr r0, [sp, #16] +; ARM6-NEXT: bl __moddi3 +; ARM6-NEXT: eor r0, r0, #3 +; ARM6-NEXT: orrs r2, r0, r1 +; ARM6-NEXT: mov r0, r4 +; ARM6-NEXT: movne r2, #1 +; ARM6-NEXT: mov r1, r5 +; ARM6-NEXT: pop {r4, r5, r6, pc} +; +; ARM7-LABEL: test_srem_vec: +; ARM7: @ %bb.0: +; ARM7-NEXT: push {r4, r5, r6, r7, r11, lr} +; ARM7-NEXT: vpush {d8, d9} +; ARM7-NEXT: mov r5, r0 +; ARM7-NEXT: and r0, r3, #1 +; ARM7-NEXT: mov r4, r1 +; ARM7-NEXT: rsb r1, r0, #0 +; ARM7-NEXT: mov r0, r2 +; ARM7-NEXT: mov r2, #9 +; ARM7-NEXT: mov r3, #0 +; ARM7-NEXT: bl __moddi3 +; ARM7-NEXT: mov r6, r0 +; ARM7-NEXT: and r0, r4, #1 +; ARM7-NEXT: mov r7, r1 +; ARM7-NEXT: rsb r1, r0, #0 +; ARM7-NEXT: mov r0, r5 +; ARM7-NEXT: mov r2, #9 +; ARM7-NEXT: mov r3, #0 +; ARM7-NEXT: bl __moddi3 +; ARM7-NEXT: vmov.32 d8[0], r0 +; ARM7-NEXT: ldr r0, [sp, #44] +; ARM7-NEXT: ldr r2, [sp, #40] +; ARM7-NEXT: mov r4, r1 +; ARM7-NEXT: and r0, r0, #1 +; ARM7-NEXT: mvn r3, #0 +; ARM7-NEXT: rsb r1, r0, #0 +; ARM7-NEXT: vmov.32 d9[0], r6 +; ARM7-NEXT: mov r0, r2 +; ARM7-NEXT: mvn r2, #8 +; ARM7-NEXT: bl __moddi3 +; ARM7-NEXT: vmov.32 d16[0], r0 +; ARM7-NEXT: adr r0, .LCPI3_0 +; ARM7-NEXT: vmov.32 d9[1], r7 +; ARM7-NEXT: vld1.64 {d18, d19}, [r0:128] +; ARM7-NEXT: adr r0, .LCPI3_1 +; ARM7-NEXT: vmov.32 d16[1], r1 +; ARM7-NEXT: vmov.32 d8[1], r4 +; ARM7-NEXT: vand q8, q8, q9 +; ARM7-NEXT: vld1.64 {d20, d21}, [r0:128] +; ARM7-NEXT: adr r0, .LCPI3_2 +; ARM7-NEXT: vand q11, q4, q9 +; ARM7-NEXT: vld1.64 {d18, d19}, [r0:128] +; ARM7-NEXT: vceq.i32 q10, q11, q10 +; ARM7-NEXT: vceq.i32 q8, q8, q9 +; ARM7-NEXT: vrev64.32 q9, q10 +; ARM7-NEXT: vrev64.32 q11, q8 +; ARM7-NEXT: vand q9, q10, q9 +; ARM7-NEXT: vand q8, q8, q11 +; ARM7-NEXT: vmvn q9, q9 +; ARM7-NEXT: vmvn q8, q8 +; ARM7-NEXT: vmovn.i64 d18, q9 +; ARM7-NEXT: vmovn.i64 d16, q8 +; ARM7-NEXT: vmov.32 r0, d18[0] +; ARM7-NEXT: vmov.32 r1, d18[1] +; ARM7-NEXT: vmov.32 r2, d16[0] +; ARM7-NEXT: vpop {d8, d9} +; ARM7-NEXT: pop {r4, r5, r6, r7, r11, pc} +; ARM7-NEXT: .p2align 4 +; ARM7-NEXT: @ %bb.1: +; ARM7-NEXT: .LCPI3_0: +; ARM7-NEXT: .long 4294967295 @ 0xffffffff +; ARM7-NEXT: .long 1 @ 0x1 +; ARM7-NEXT: .long 4294967295 @ 0xffffffff +; ARM7-NEXT: .long 1 @ 0x1 +; ARM7-NEXT: .LCPI3_1: +; ARM7-NEXT: .long 3 @ 0x3 +; ARM7-NEXT: .long 0 @ 0x0 +; ARM7-NEXT: .long 4294967293 @ 0xfffffffd +; ARM7-NEXT: .long 1 @ 0x1 +; ARM7-NEXT: .LCPI3_2: +; ARM7-NEXT: .long 3 @ 0x3 +; ARM7-NEXT: .long 0 @ 0x0 +; ARM7-NEXT: .zero 4 +; ARM7-NEXT: .long 0 @ 0x0 +; +; ARM8-LABEL: test_srem_vec: +; ARM8: @ %bb.0: +; ARM8-NEXT: push {r4, r5, r6, r7, r11, lr} +; ARM8-NEXT: vpush {d8, d9} +; ARM8-NEXT: mov r5, r0 +; ARM8-NEXT: and r0, r3, #1 +; ARM8-NEXT: mov r4, r1 +; ARM8-NEXT: rsb r1, r0, #0 +; ARM8-NEXT: mov r0, r2 +; ARM8-NEXT: mov r2, #9 +; ARM8-NEXT: mov r3, #0 +; ARM8-NEXT: bl __moddi3 +; ARM8-NEXT: mov r6, r0 +; ARM8-NEXT: and r0, r4, #1 +; ARM8-NEXT: mov r7, r1 +; ARM8-NEXT: rsb r1, r0, #0 +; ARM8-NEXT: mov r0, r5 +; ARM8-NEXT: mov r2, #9 +; ARM8-NEXT: mov r3, #0 +; ARM8-NEXT: bl __moddi3 +; ARM8-NEXT: vmov.32 d8[0], r0 +; ARM8-NEXT: ldr r0, [sp, #44] +; ARM8-NEXT: ldr r2, [sp, #40] +; ARM8-NEXT: mov r4, r1 +; ARM8-NEXT: and r0, r0, #1 +; ARM8-NEXT: mvn r3, #0 +; ARM8-NEXT: rsb r1, r0, #0 +; ARM8-NEXT: vmov.32 d9[0], r6 +; ARM8-NEXT: mov r0, r2 +; ARM8-NEXT: mvn r2, #8 +; ARM8-NEXT: bl __moddi3 +; ARM8-NEXT: vmov.32 d16[0], r0 +; ARM8-NEXT: adr r0, .LCPI3_0 +; ARM8-NEXT: vmov.32 d9[1], r7 +; ARM8-NEXT: vld1.64 {d18, d19}, [r0:128] +; ARM8-NEXT: adr r0, .LCPI3_1 +; ARM8-NEXT: vmov.32 d16[1], r1 +; ARM8-NEXT: vmov.32 d8[1], r4 +; ARM8-NEXT: vand q8, q8, q9 +; ARM8-NEXT: vld1.64 {d20, d21}, [r0:128] +; ARM8-NEXT: adr r0, .LCPI3_2 +; ARM8-NEXT: vand q11, q4, q9 +; ARM8-NEXT: vld1.64 {d18, d19}, [r0:128] +; ARM8-NEXT: vceq.i32 q10, q11, q10 +; ARM8-NEXT: vceq.i32 q8, q8, q9 +; ARM8-NEXT: vrev64.32 q9, q10 +; ARM8-NEXT: vrev64.32 q11, q8 +; ARM8-NEXT: vand q9, q10, q9 +; ARM8-NEXT: vand q8, q8, q11 +; ARM8-NEXT: vmvn q9, q9 +; ARM8-NEXT: vmvn q8, q8 +; ARM8-NEXT: vmovn.i64 d18, q9 +; ARM8-NEXT: vmovn.i64 d16, q8 +; ARM8-NEXT: vmov.32 r0, d18[0] +; ARM8-NEXT: vmov.32 r1, d18[1] +; ARM8-NEXT: vmov.32 r2, d16[0] +; ARM8-NEXT: vpop {d8, d9} +; ARM8-NEXT: pop {r4, r5, r6, r7, r11, pc} +; ARM8-NEXT: .p2align 4 +; ARM8-NEXT: @ %bb.1: +; ARM8-NEXT: .LCPI3_0: +; ARM8-NEXT: .long 4294967295 @ 0xffffffff +; ARM8-NEXT: .long 1 @ 0x1 +; ARM8-NEXT: .long 4294967295 @ 0xffffffff +; ARM8-NEXT: .long 1 @ 0x1 +; ARM8-NEXT: .LCPI3_1: +; ARM8-NEXT: .long 3 @ 0x3 +; ARM8-NEXT: .long 0 @ 0x0 +; ARM8-NEXT: .long 4294967293 @ 0xfffffffd +; ARM8-NEXT: .long 1 @ 0x1 +; ARM8-NEXT: .LCPI3_2: +; ARM8-NEXT: .long 3 @ 0x3 +; ARM8-NEXT: .long 0 @ 0x0 +; ARM8-NEXT: .zero 4 +; ARM8-NEXT: .long 0 @ 0x0 +; +; NEON7-LABEL: test_srem_vec: +; NEON7: @ %bb.0: +; NEON7-NEXT: push {r4, r5, r6, r7, r11, lr} +; NEON7-NEXT: vpush {d8, d9} +; NEON7-NEXT: mov r5, r0 +; NEON7-NEXT: and r0, r3, #1 +; NEON7-NEXT: mov r4, r1 +; NEON7-NEXT: rsb r1, r0, #0 +; NEON7-NEXT: mov r0, r2 +; NEON7-NEXT: mov r2, #9 +; NEON7-NEXT: mov r3, #0 +; NEON7-NEXT: bl __moddi3 +; NEON7-NEXT: mov r6, r0 +; NEON7-NEXT: and r0, r4, #1 +; NEON7-NEXT: mov r7, r1 +; NEON7-NEXT: rsb r1, r0, #0 +; NEON7-NEXT: mov r0, r5 +; NEON7-NEXT: mov r2, #9 +; NEON7-NEXT: mov r3, #0 +; NEON7-NEXT: bl __moddi3 +; NEON7-NEXT: vmov.32 d8[0], r0 +; NEON7-NEXT: ldr r0, [sp, #44] +; NEON7-NEXT: ldr r2, [sp, #40] +; NEON7-NEXT: mov r4, r1 +; NEON7-NEXT: and r0, r0, #1 +; NEON7-NEXT: mvn r3, #0 +; NEON7-NEXT: rsb r1, r0, #0 +; NEON7-NEXT: vmov.32 d9[0], r6 +; NEON7-NEXT: mov r0, r2 +; NEON7-NEXT: mvn r2, #8 +; NEON7-NEXT: bl __moddi3 +; NEON7-NEXT: vmov.32 d16[0], r0 +; NEON7-NEXT: adr r0, .LCPI3_0 +; NEON7-NEXT: vmov.32 d9[1], r7 +; NEON7-NEXT: vld1.64 {d18, d19}, [r0:128] +; NEON7-NEXT: adr r0, .LCPI3_1 +; NEON7-NEXT: vmov.32 d16[1], r1 +; NEON7-NEXT: vmov.32 d8[1], r4 +; NEON7-NEXT: vand q8, q8, q9 +; NEON7-NEXT: vld1.64 {d20, d21}, [r0:128] +; NEON7-NEXT: adr r0, .LCPI3_2 +; NEON7-NEXT: vand q11, q4, q9 +; NEON7-NEXT: vld1.64 {d18, d19}, [r0:128] +; NEON7-NEXT: vceq.i32 q10, q11, q10 +; NEON7-NEXT: vceq.i32 q8, q8, q9 +; NEON7-NEXT: vrev64.32 q9, q10 +; NEON7-NEXT: vrev64.32 q11, q8 +; NEON7-NEXT: vand q9, q10, q9 +; NEON7-NEXT: vand q8, q8, q11 +; NEON7-NEXT: vmvn q9, q9 +; NEON7-NEXT: vmvn q8, q8 +; NEON7-NEXT: vmovn.i64 d18, q9 +; NEON7-NEXT: vmovn.i64 d16, q8 +; NEON7-NEXT: vmov.32 r0, d18[0] +; NEON7-NEXT: vmov.32 r1, d18[1] +; NEON7-NEXT: vmov.32 r2, d16[0] +; NEON7-NEXT: vpop {d8, d9} +; NEON7-NEXT: pop {r4, r5, r6, r7, r11, pc} +; NEON7-NEXT: .p2align 4 +; NEON7-NEXT: @ %bb.1: +; NEON7-NEXT: .LCPI3_0: +; NEON7-NEXT: .long 4294967295 @ 0xffffffff +; NEON7-NEXT: .long 1 @ 0x1 +; NEON7-NEXT: .long 4294967295 @ 0xffffffff +; NEON7-NEXT: .long 1 @ 0x1 +; NEON7-NEXT: .LCPI3_1: +; NEON7-NEXT: .long 3 @ 0x3 +; NEON7-NEXT: .long 0 @ 0x0 +; NEON7-NEXT: .long 4294967293 @ 0xfffffffd +; NEON7-NEXT: .long 1 @ 0x1 +; NEON7-NEXT: .LCPI3_2: +; NEON7-NEXT: .long 3 @ 0x3 +; NEON7-NEXT: .long 0 @ 0x0 +; NEON7-NEXT: .zero 4 +; NEON7-NEXT: .long 0 @ 0x0 +; +; NEON8-LABEL: test_srem_vec: +; NEON8: @ %bb.0: +; NEON8-NEXT: push {r4, r5, r6, r7, r11, lr} +; NEON8-NEXT: vpush {d8, d9} +; NEON8-NEXT: mov r5, r0 +; NEON8-NEXT: and r0, r3, #1 +; NEON8-NEXT: mov r4, r1 +; NEON8-NEXT: rsb r1, r0, #0 +; NEON8-NEXT: mov r0, r2 +; NEON8-NEXT: mov r2, #9 +; NEON8-NEXT: mov r3, #0 +; NEON8-NEXT: bl __moddi3 +; NEON8-NEXT: mov r6, r0 +; NEON8-NEXT: and r0, r4, #1 +; NEON8-NEXT: mov r7, r1 +; NEON8-NEXT: rsb r1, r0, #0 +; NEON8-NEXT: mov r0, r5 +; NEON8-NEXT: mov r2, #9 +; NEON8-NEXT: mov r3, #0 +; NEON8-NEXT: bl __moddi3 +; NEON8-NEXT: vmov.32 d8[0], r0 +; NEON8-NEXT: ldr r0, [sp, #44] +; NEON8-NEXT: ldr r2, [sp, #40] +; NEON8-NEXT: mov r4, r1 +; NEON8-NEXT: and r0, r0, #1 +; NEON8-NEXT: mvn r3, #0 +; NEON8-NEXT: rsb r1, r0, #0 +; NEON8-NEXT: vmov.32 d9[0], r6 +; NEON8-NEXT: mov r0, r2 +; NEON8-NEXT: mvn r2, #8 +; NEON8-NEXT: bl __moddi3 +; NEON8-NEXT: vmov.32 d16[0], r0 +; NEON8-NEXT: adr r0, .LCPI3_0 +; NEON8-NEXT: vmov.32 d9[1], r7 +; NEON8-NEXT: vld1.64 {d18, d19}, [r0:128] +; NEON8-NEXT: adr r0, .LCPI3_1 +; NEON8-NEXT: vmov.32 d16[1], r1 +; NEON8-NEXT: vmov.32 d8[1], r4 +; NEON8-NEXT: vand q8, q8, q9 +; NEON8-NEXT: vld1.64 {d20, d21}, [r0:128] +; NEON8-NEXT: adr r0, .LCPI3_2 +; NEON8-NEXT: vand q11, q4, q9 +; NEON8-NEXT: vld1.64 {d18, d19}, [r0:128] +; NEON8-NEXT: vceq.i32 q10, q11, q10 +; NEON8-NEXT: vceq.i32 q8, q8, q9 +; NEON8-NEXT: vrev64.32 q9, q10 +; NEON8-NEXT: vrev64.32 q11, q8 +; NEON8-NEXT: vand q9, q10, q9 +; NEON8-NEXT: vand q8, q8, q11 +; NEON8-NEXT: vmvn q9, q9 +; NEON8-NEXT: vmvn q8, q8 +; NEON8-NEXT: vmovn.i64 d18, q9 +; NEON8-NEXT: vmovn.i64 d16, q8 +; NEON8-NEXT: vmov.32 r0, d18[0] +; NEON8-NEXT: vmov.32 r1, d18[1] +; NEON8-NEXT: vmov.32 r2, d16[0] +; NEON8-NEXT: vpop {d8, d9} +; NEON8-NEXT: pop {r4, r5, r6, r7, r11, pc} +; NEON8-NEXT: .p2align 4 +; NEON8-NEXT: @ %bb.1: +; NEON8-NEXT: .LCPI3_0: +; NEON8-NEXT: .long 4294967295 @ 0xffffffff +; NEON8-NEXT: .long 1 @ 0x1 +; NEON8-NEXT: .long 4294967295 @ 0xffffffff +; NEON8-NEXT: .long 1 @ 0x1 +; NEON8-NEXT: .LCPI3_1: +; NEON8-NEXT: .long 3 @ 0x3 +; NEON8-NEXT: .long 0 @ 0x0 +; NEON8-NEXT: .long 4294967293 @ 0xfffffffd +; NEON8-NEXT: .long 1 @ 0x1 +; NEON8-NEXT: .LCPI3_2: +; NEON8-NEXT: .long 3 @ 0x3 +; NEON8-NEXT: .long 0 @ 0x0 +; NEON8-NEXT: .zero 4 +; NEON8-NEXT: .long 0 @ 0x0 + %srem = srem <3 x i33> %X, + %cmp = icmp ne <3 x i33> %srem, + ret <3 x i1> %cmp +} diff --git a/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll @@ -0,0 +1,767 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv5-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=ARM5 +; RUN: llc -mtriple=armv6-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=ARM6 +; RUN: llc -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=ARM7 +; RUN: llc -mtriple=armv8-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=ARM8 +; RUN: llc -mtriple=armv7-unknown-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefixes=NEON7 +; RUN: llc -mtriple=armv8-unknown-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefixes=NEON8 + +define i1 @test_urem_odd(i13 %X) nounwind { +; ARM5-LABEL: test_urem_odd: +; ARM5: @ %bb.0: +; ARM5-NEXT: mov r1, #255 +; ARM5-NEXT: orr r1, r1, #7936 +; ARM5-NEXT: and r0, r0, r1 +; ARM5-NEXT: ldr r1, .LCPI0_0 +; ARM5-NEXT: mul r2, r0, r1 +; ARM5-NEXT: ldr r1, .LCPI0_1 +; ARM5-NEXT: mov r0, #0 +; ARM5-NEXT: cmp r2, r1 +; ARM5-NEXT: movlo r0, #1 +; ARM5-NEXT: bx lr +; ARM5-NEXT: .p2align 2 +; ARM5-NEXT: @ %bb.1: +; ARM5-NEXT: .LCPI0_0: +; ARM5-NEXT: .long 3435973837 @ 0xcccccccd +; ARM5-NEXT: .LCPI0_1: +; ARM5-NEXT: .long 858993460 @ 0x33333334 +; +; ARM6-LABEL: test_urem_odd: +; ARM6: @ %bb.0: +; ARM6-NEXT: mov r1, #255 +; ARM6-NEXT: ldr r2, .LCPI0_1 +; ARM6-NEXT: orr r1, r1, #7936 +; ARM6-NEXT: and r0, r0, r1 +; ARM6-NEXT: ldr r1, .LCPI0_0 +; ARM6-NEXT: mul r1, r0, r1 +; ARM6-NEXT: mov r0, #0 +; ARM6-NEXT: cmp r1, r2 +; ARM6-NEXT: movlo r0, #1 +; ARM6-NEXT: bx lr +; ARM6-NEXT: .p2align 2 +; ARM6-NEXT: @ %bb.1: +; ARM6-NEXT: .LCPI0_0: +; ARM6-NEXT: .long 3435973837 @ 0xcccccccd +; ARM6-NEXT: .LCPI0_1: +; ARM6-NEXT: .long 858993460 @ 0x33333334 +; +; ARM7-LABEL: test_urem_odd: +; ARM7: @ %bb.0: +; ARM7-NEXT: movw r1, #52429 +; ARM7-NEXT: bfc r0, #13, #19 +; ARM7-NEXT: movt r1, #52428 +; ARM7-NEXT: movw r2, #13108 +; ARM7-NEXT: mul r1, r0, r1 +; ARM7-NEXT: movt r2, #13107 +; ARM7-NEXT: mov r0, #0 +; ARM7-NEXT: cmp r1, r2 +; ARM7-NEXT: movwlo r0, #1 +; ARM7-NEXT: bx lr +; +; ARM8-LABEL: test_urem_odd: +; ARM8: @ %bb.0: +; ARM8-NEXT: movw r1, #52429 +; ARM8-NEXT: bfc r0, #13, #19 +; ARM8-NEXT: movt r1, #52428 +; ARM8-NEXT: movw r2, #13108 +; ARM8-NEXT: mul r1, r0, r1 +; ARM8-NEXT: movt r2, #13107 +; ARM8-NEXT: mov r0, #0 +; ARM8-NEXT: cmp r1, r2 +; ARM8-NEXT: movwlo r0, #1 +; ARM8-NEXT: bx lr +; +; NEON7-LABEL: test_urem_odd: +; NEON7: @ %bb.0: +; NEON7-NEXT: movw r1, #52429 +; NEON7-NEXT: bfc r0, #13, #19 +; NEON7-NEXT: movt r1, #52428 +; NEON7-NEXT: movw r2, #13108 +; NEON7-NEXT: mul r1, r0, r1 +; NEON7-NEXT: movt r2, #13107 +; NEON7-NEXT: mov r0, #0 +; NEON7-NEXT: cmp r1, r2 +; NEON7-NEXT: movwlo r0, #1 +; NEON7-NEXT: bx lr +; +; NEON8-LABEL: test_urem_odd: +; NEON8: @ %bb.0: +; NEON8-NEXT: movw r1, #52429 +; NEON8-NEXT: bfc r0, #13, #19 +; NEON8-NEXT: movt r1, #52428 +; NEON8-NEXT: movw r2, #13108 +; NEON8-NEXT: mul r1, r0, r1 +; NEON8-NEXT: movt r2, #13107 +; NEON8-NEXT: mov r0, #0 +; NEON8-NEXT: cmp r1, r2 +; NEON8-NEXT: movwlo r0, #1 +; NEON8-NEXT: bx lr + %urem = urem i13 %X, 5 + %cmp = icmp eq i13 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_even(i27 %X) nounwind { +; ARM5-LABEL: test_urem_even: +; ARM5: @ %bb.0: +; ARM5-NEXT: ldr r1, .LCPI1_0 +; ARM5-NEXT: bic r0, r0, #-134217728 +; ARM5-NEXT: mul r2, r0, r1 +; ARM5-NEXT: mov r0, #0 +; ARM5-NEXT: ror r1, r2, #1 +; ARM5-NEXT: ldr r2, .LCPI1_1 +; ARM5-NEXT: cmp r1, r2 +; ARM5-NEXT: movlo r0, #1 +; ARM5-NEXT: bx lr +; ARM5-NEXT: .p2align 2 +; ARM5-NEXT: @ %bb.1: +; ARM5-NEXT: .LCPI1_0: +; ARM5-NEXT: .long 3067833783 @ 0xb6db6db7 +; ARM5-NEXT: .LCPI1_1: +; ARM5-NEXT: .long 306783379 @ 0x12492493 +; +; ARM6-LABEL: test_urem_even: +; ARM6: @ %bb.0: +; ARM6-NEXT: ldr r1, .LCPI1_0 +; ARM6-NEXT: bic r0, r0, #-134217728 +; ARM6-NEXT: ldr r2, .LCPI1_1 +; ARM6-NEXT: mul r0, r0, r1 +; ARM6-NEXT: ror r1, r0, #1 +; ARM6-NEXT: mov r0, #0 +; ARM6-NEXT: cmp r1, r2 +; ARM6-NEXT: movlo r0, #1 +; ARM6-NEXT: bx lr +; ARM6-NEXT: .p2align 2 +; ARM6-NEXT: @ %bb.1: +; ARM6-NEXT: .LCPI1_0: +; ARM6-NEXT: .long 3067833783 @ 0xb6db6db7 +; ARM6-NEXT: .LCPI1_1: +; ARM6-NEXT: .long 306783379 @ 0x12492493 +; +; ARM7-LABEL: test_urem_even: +; ARM7: @ %bb.0: +; ARM7-NEXT: movw r1, #28087 +; ARM7-NEXT: bic r0, r0, #-134217728 +; ARM7-NEXT: movt r1, #46811 +; ARM7-NEXT: movw r2, #9363 +; ARM7-NEXT: mul r0, r0, r1 +; ARM7-NEXT: movt r2, #4681 +; ARM7-NEXT: ror r1, r0, #1 +; ARM7-NEXT: mov r0, #0 +; ARM7-NEXT: cmp r1, r2 +; ARM7-NEXT: movwlo r0, #1 +; ARM7-NEXT: bx lr +; +; ARM8-LABEL: test_urem_even: +; ARM8: @ %bb.0: +; ARM8-NEXT: movw r1, #28087 +; ARM8-NEXT: bic r0, r0, #-134217728 +; ARM8-NEXT: movt r1, #46811 +; ARM8-NEXT: movw r2, #9363 +; ARM8-NEXT: mul r0, r0, r1 +; ARM8-NEXT: movt r2, #4681 +; ARM8-NEXT: ror r1, r0, #1 +; ARM8-NEXT: mov r0, #0 +; ARM8-NEXT: cmp r1, r2 +; ARM8-NEXT: movwlo r0, #1 +; ARM8-NEXT: bx lr +; +; NEON7-LABEL: test_urem_even: +; NEON7: @ %bb.0: +; NEON7-NEXT: movw r1, #28087 +; NEON7-NEXT: bic r0, r0, #-134217728 +; NEON7-NEXT: movt r1, #46811 +; NEON7-NEXT: movw r2, #9363 +; NEON7-NEXT: mul r0, r0, r1 +; NEON7-NEXT: movt r2, #4681 +; NEON7-NEXT: ror r1, r0, #1 +; NEON7-NEXT: mov r0, #0 +; NEON7-NEXT: cmp r1, r2 +; NEON7-NEXT: movwlo r0, #1 +; NEON7-NEXT: bx lr +; +; NEON8-LABEL: test_urem_even: +; NEON8: @ %bb.0: +; NEON8-NEXT: movw r1, #28087 +; NEON8-NEXT: bic r0, r0, #-134217728 +; NEON8-NEXT: movt r1, #46811 +; NEON8-NEXT: movw r2, #9363 +; NEON8-NEXT: mul r0, r0, r1 +; NEON8-NEXT: movt r2, #4681 +; NEON8-NEXT: ror r1, r0, #1 +; NEON8-NEXT: mov r0, #0 +; NEON8-NEXT: cmp r1, r2 +; NEON8-NEXT: movwlo r0, #1 +; NEON8-NEXT: bx lr + %urem = urem i27 %X, 14 + %cmp = icmp eq i27 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_odd_setne(i4 %X) nounwind { +; ARM5-LABEL: test_urem_odd_setne: +; ARM5: @ %bb.0: +; ARM5-NEXT: ldr r1, .LCPI2_0 +; ARM5-NEXT: and r0, r0, #15 +; ARM5-NEXT: mul r2, r0, r1 +; ARM5-NEXT: ldr r1, .LCPI2_1 +; ARM5-NEXT: mov r0, #0 +; ARM5-NEXT: cmp r2, r1 +; ARM5-NEXT: movhi r0, #1 +; ARM5-NEXT: bx lr +; ARM5-NEXT: .p2align 2 +; ARM5-NEXT: @ %bb.1: +; ARM5-NEXT: .LCPI2_0: +; ARM5-NEXT: .long 3435973837 @ 0xcccccccd +; ARM5-NEXT: .LCPI2_1: +; ARM5-NEXT: .long 858993459 @ 0x33333333 +; +; ARM6-LABEL: test_urem_odd_setne: +; ARM6: @ %bb.0: +; ARM6-NEXT: ldr r1, .LCPI2_0 +; ARM6-NEXT: and r0, r0, #15 +; ARM6-NEXT: ldr r2, .LCPI2_1 +; ARM6-NEXT: mul r1, r0, r1 +; ARM6-NEXT: mov r0, #0 +; ARM6-NEXT: cmp r1, r2 +; ARM6-NEXT: movhi r0, #1 +; ARM6-NEXT: bx lr +; ARM6-NEXT: .p2align 2 +; ARM6-NEXT: @ %bb.1: +; ARM6-NEXT: .LCPI2_0: +; ARM6-NEXT: .long 3435973837 @ 0xcccccccd +; ARM6-NEXT: .LCPI2_1: +; ARM6-NEXT: .long 858993459 @ 0x33333333 +; +; ARM7-LABEL: test_urem_odd_setne: +; ARM7: @ %bb.0: +; ARM7-NEXT: movw r1, #52429 +; ARM7-NEXT: and r0, r0, #15 +; ARM7-NEXT: movt r1, #52428 +; ARM7-NEXT: movw r2, #13107 +; ARM7-NEXT: mul r1, r0, r1 +; ARM7-NEXT: movt r2, #13107 +; ARM7-NEXT: mov r0, #0 +; ARM7-NEXT: cmp r1, r2 +; ARM7-NEXT: movwhi r0, #1 +; ARM7-NEXT: bx lr +; +; ARM8-LABEL: test_urem_odd_setne: +; ARM8: @ %bb.0: +; ARM8-NEXT: movw r1, #52429 +; ARM8-NEXT: and r0, r0, #15 +; ARM8-NEXT: movt r1, #52428 +; ARM8-NEXT: movw r2, #13107 +; ARM8-NEXT: mul r1, r0, r1 +; ARM8-NEXT: movt r2, #13107 +; ARM8-NEXT: mov r0, #0 +; ARM8-NEXT: cmp r1, r2 +; ARM8-NEXT: movwhi r0, #1 +; ARM8-NEXT: bx lr +; +; NEON7-LABEL: test_urem_odd_setne: +; NEON7: @ %bb.0: +; NEON7-NEXT: movw r1, #52429 +; NEON7-NEXT: and r0, r0, #15 +; NEON7-NEXT: movt r1, #52428 +; NEON7-NEXT: movw r2, #13107 +; NEON7-NEXT: mul r1, r0, r1 +; NEON7-NEXT: movt r2, #13107 +; NEON7-NEXT: mov r0, #0 +; NEON7-NEXT: cmp r1, r2 +; NEON7-NEXT: movwhi r0, #1 +; NEON7-NEXT: bx lr +; +; NEON8-LABEL: test_urem_odd_setne: +; NEON8: @ %bb.0: +; NEON8-NEXT: movw r1, #52429 +; NEON8-NEXT: and r0, r0, #15 +; NEON8-NEXT: movt r1, #52428 +; NEON8-NEXT: movw r2, #13107 +; NEON8-NEXT: mul r1, r0, r1 +; NEON8-NEXT: movt r2, #13107 +; NEON8-NEXT: mov r0, #0 +; NEON8-NEXT: cmp r1, r2 +; NEON8-NEXT: movwhi r0, #1 +; NEON8-NEXT: bx lr + %urem = urem i4 %X, 5 + %cmp = icmp ne i4 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_negative_odd(i9 %X) nounwind { +; ARM5-LABEL: test_urem_negative_odd: +; ARM5: @ %bb.0: +; ARM5-NEXT: mov r1, #255 +; ARM5-NEXT: orr r1, r1, #256 +; ARM5-NEXT: and r0, r0, r1 +; ARM5-NEXT: ldr r1, .LCPI3_0 +; ARM5-NEXT: mul r2, r0, r1 +; ARM5-NEXT: ldr r1, .LCPI3_1 +; ARM5-NEXT: mov r0, #0 +; ARM5-NEXT: cmp r2, r1 +; ARM5-NEXT: movhi r0, #1 +; ARM5-NEXT: bx lr +; ARM5-NEXT: .p2align 2 +; ARM5-NEXT: @ %bb.1: +; ARM5-NEXT: .LCPI3_0: +; ARM5-NEXT: .long 2837897523 @ 0xa926e133 +; ARM5-NEXT: .LCPI3_1: +; ARM5-NEXT: .long 8471335 @ 0x814327 +; +; ARM6-LABEL: test_urem_negative_odd: +; ARM6: @ %bb.0: +; ARM6-NEXT: mov r1, #255 +; ARM6-NEXT: ldr r2, .LCPI3_1 +; ARM6-NEXT: orr r1, r1, #256 +; ARM6-NEXT: and r0, r0, r1 +; ARM6-NEXT: ldr r1, .LCPI3_0 +; ARM6-NEXT: mul r1, r0, r1 +; ARM6-NEXT: mov r0, #0 +; ARM6-NEXT: cmp r1, r2 +; ARM6-NEXT: movhi r0, #1 +; ARM6-NEXT: bx lr +; ARM6-NEXT: .p2align 2 +; ARM6-NEXT: @ %bb.1: +; ARM6-NEXT: .LCPI3_0: +; ARM6-NEXT: .long 2837897523 @ 0xa926e133 +; ARM6-NEXT: .LCPI3_1: +; ARM6-NEXT: .long 8471335 @ 0x814327 +; +; ARM7-LABEL: test_urem_negative_odd: +; ARM7: @ %bb.0: +; ARM7-NEXT: movw r1, #57651 +; ARM7-NEXT: bfc r0, #9, #23 +; ARM7-NEXT: movt r1, #43302 +; ARM7-NEXT: movw r2, #17191 +; ARM7-NEXT: mul r1, r0, r1 +; ARM7-NEXT: movt r2, #129 +; ARM7-NEXT: mov r0, #0 +; ARM7-NEXT: cmp r1, r2 +; ARM7-NEXT: movwhi r0, #1 +; ARM7-NEXT: bx lr +; +; ARM8-LABEL: test_urem_negative_odd: +; ARM8: @ %bb.0: +; ARM8-NEXT: movw r1, #57651 +; ARM8-NEXT: bfc r0, #9, #23 +; ARM8-NEXT: movt r1, #43302 +; ARM8-NEXT: movw r2, #17191 +; ARM8-NEXT: mul r1, r0, r1 +; ARM8-NEXT: movt r2, #129 +; ARM8-NEXT: mov r0, #0 +; ARM8-NEXT: cmp r1, r2 +; ARM8-NEXT: movwhi r0, #1 +; ARM8-NEXT: bx lr +; +; NEON7-LABEL: test_urem_negative_odd: +; NEON7: @ %bb.0: +; NEON7-NEXT: movw r1, #57651 +; NEON7-NEXT: bfc r0, #9, #23 +; NEON7-NEXT: movt r1, #43302 +; NEON7-NEXT: movw r2, #17191 +; NEON7-NEXT: mul r1, r0, r1 +; NEON7-NEXT: movt r2, #129 +; NEON7-NEXT: mov r0, #0 +; NEON7-NEXT: cmp r1, r2 +; NEON7-NEXT: movwhi r0, #1 +; NEON7-NEXT: bx lr +; +; NEON8-LABEL: test_urem_negative_odd: +; NEON8: @ %bb.0: +; NEON8-NEXT: movw r1, #57651 +; NEON8-NEXT: bfc r0, #9, #23 +; NEON8-NEXT: movt r1, #43302 +; NEON8-NEXT: movw r2, #17191 +; NEON8-NEXT: mul r1, r0, r1 +; NEON8-NEXT: movt r2, #129 +; NEON8-NEXT: mov r0, #0 +; NEON8-NEXT: cmp r1, r2 +; NEON8-NEXT: movwhi r0, #1 +; NEON8-NEXT: bx lr + %urem = urem i9 %X, -5 + %cmp = icmp ne i9 %urem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { +; ARM5-LABEL: test_urem_vec: +; ARM5: @ %bb.0: +; ARM5-NEXT: push {r4, r5, r11, lr} +; ARM5-NEXT: mov r12, #255 +; ARM5-NEXT: ldr r3, .LCPI4_1 +; ARM5-NEXT: orr r12, r12, #1792 +; ARM5-NEXT: ldr lr, .LCPI4_0 +; ARM5-NEXT: and r1, r1, r12 +; ARM5-NEXT: and r2, r2, r12 +; ARM5-NEXT: and r0, r0, r12 +; ARM5-NEXT: mla r4, r1, r3, lr +; ARM5-NEXT: ldr r1, .LCPI4_2 +; ARM5-NEXT: ldr lr, .LCPI4_3 +; ARM5-NEXT: mov r3, #0 +; ARM5-NEXT: cmp r4, r1 +; ARM5-NEXT: ldr r4, .LCPI4_4 +; ARM5-NEXT: mov r1, #0 +; ARM5-NEXT: movhi r1, #1 +; ARM5-NEXT: mla r5, r2, r4, lr +; ARM5-NEXT: ldr r2, .LCPI4_5 +; ARM5-NEXT: cmp r5, r2 +; ARM5-NEXT: ldr r5, .LCPI4_6 +; ARM5-NEXT: mov r2, #0 +; ARM5-NEXT: movhi r2, #1 +; ARM5-NEXT: mul r4, r0, r5 +; ARM5-NEXT: ldr r5, .LCPI4_7 +; ARM5-NEXT: ror r0, r4, #1 +; ARM5-NEXT: cmp r0, r5 +; ARM5-NEXT: movhi r3, #1 +; ARM5-NEXT: mov r0, r3 +; ARM5-NEXT: pop {r4, r5, r11, pc} +; ARM5-NEXT: .p2align 2 +; ARM5-NEXT: @ %bb.1: +; ARM5-NEXT: .LCPI4_0: +; ARM5-NEXT: .long 1227133513 @ 0x49249249 +; ARM5-NEXT: .LCPI4_1: +; ARM5-NEXT: .long 3067833783 @ 0xb6db6db7 +; ARM5-NEXT: .LCPI4_2: +; ARM5-NEXT: .long 613566756 @ 0x24924924 +; ARM5-NEXT: .LCPI4_3: +; ARM5-NEXT: .long 4191955354 @ 0xf9dc299a +; ARM5-NEXT: .LCPI4_4: +; ARM5-NEXT: .long 2198989619 @ 0x8311eb33 +; ARM5-NEXT: .LCPI4_5: +; ARM5-NEXT: .long 2102284 @ 0x20140c +; ARM5-NEXT: .LCPI4_6: +; ARM5-NEXT: .long 2863311531 @ 0xaaaaaaab +; ARM5-NEXT: .LCPI4_7: +; ARM5-NEXT: .long 715827882 @ 0x2aaaaaaa +; +; ARM6-LABEL: test_urem_vec: +; ARM6: @ %bb.0: +; ARM6-NEXT: push {r4, lr} +; ARM6-NEXT: mov r12, #255 +; ARM6-NEXT: ldr r3, .LCPI4_1 +; ARM6-NEXT: orr r12, r12, #1792 +; ARM6-NEXT: ldr lr, .LCPI4_0 +; ARM6-NEXT: and r1, r1, r12 +; ARM6-NEXT: ldr r4, .LCPI4_4 +; ARM6-NEXT: and r2, r2, r12 +; ARM6-NEXT: and r0, r0, r12 +; ARM6-NEXT: mla r1, r1, r3, lr +; ARM6-NEXT: ldr lr, .LCPI4_2 +; ARM6-NEXT: mov r3, #0 +; ARM6-NEXT: cmp r1, lr +; ARM6-NEXT: ldr lr, .LCPI4_3 +; ARM6-NEXT: mla r2, r2, r4, lr +; ARM6-NEXT: ldr r4, .LCPI4_5 +; ARM6-NEXT: mov r1, #0 +; ARM6-NEXT: movhi r1, #1 +; ARM6-NEXT: cmp r2, r4 +; ARM6-NEXT: ldr r4, .LCPI4_6 +; ARM6-NEXT: mov r2, #0 +; ARM6-NEXT: movhi r2, #1 +; ARM6-NEXT: mul r0, r0, r4 +; ARM6-NEXT: ldr r4, .LCPI4_7 +; ARM6-NEXT: ror r0, r0, #1 +; ARM6-NEXT: cmp r0, r4 +; ARM6-NEXT: movhi r3, #1 +; ARM6-NEXT: mov r0, r3 +; ARM6-NEXT: pop {r4, pc} +; ARM6-NEXT: .p2align 2 +; ARM6-NEXT: @ %bb.1: +; ARM6-NEXT: .LCPI4_0: +; ARM6-NEXT: .long 1227133513 @ 0x49249249 +; ARM6-NEXT: .LCPI4_1: +; ARM6-NEXT: .long 3067833783 @ 0xb6db6db7 +; ARM6-NEXT: .LCPI4_2: +; ARM6-NEXT: .long 613566756 @ 0x24924924 +; ARM6-NEXT: .LCPI4_3: +; ARM6-NEXT: .long 4191955354 @ 0xf9dc299a +; ARM6-NEXT: .LCPI4_4: +; ARM6-NEXT: .long 2198989619 @ 0x8311eb33 +; ARM6-NEXT: .LCPI4_5: +; ARM6-NEXT: .long 2102284 @ 0x20140c +; ARM6-NEXT: .LCPI4_6: +; ARM6-NEXT: .long 2863311531 @ 0xaaaaaaab +; ARM6-NEXT: .LCPI4_7: +; ARM6-NEXT: .long 715827882 @ 0x2aaaaaaa +; +; ARM7-LABEL: test_urem_vec: +; ARM7: @ %bb.0: +; ARM7-NEXT: push {r4, lr} +; ARM7-NEXT: movw r3, #18725 +; ARM7-NEXT: bfc r1, #11, #21 +; ARM7-NEXT: movt r3, #9362 +; ARM7-NEXT: bfc r2, #11, #21 +; ARM7-NEXT: umull r3, r12, r1, r3 +; ARM7-NEXT: bfc r0, #11, #21 +; ARM7-NEXT: movw r3, #25663 +; ARM7-NEXT: movt r3, #160 +; ARM7-NEXT: umull r3, lr, r2, r3 +; ARM7-NEXT: vldr d17, .LCPI4_0 +; ARM7-NEXT: movw r3, #43691 +; ARM7-NEXT: movt r3, #43690 +; ARM7-NEXT: umull r3, r4, r0, r3 +; ARM7-NEXT: sub r3, r1, r12 +; ARM7-NEXT: add r3, r12, r3, lsr #1 +; ARM7-NEXT: lsr r12, r3, #2 +; ARM7-NEXT: sub r3, r2, lr +; ARM7-NEXT: lsr r4, r4, #2 +; ARM7-NEXT: add r4, r4, r4, lsl #1 +; ARM7-NEXT: add r3, lr, r3, lsr #1 +; ARM7-NEXT: sub r0, r0, r4, lsl #1 +; ARM7-NEXT: lsr lr, r3, #10 +; ARM7-NEXT: movw r3, #2043 +; ARM7-NEXT: vmov.16 d16[0], r0 +; ARM7-NEXT: sub r0, r12, r12, lsl #3 +; ARM7-NEXT: mls r2, lr, r3, r2 +; ARM7-NEXT: add r0, r1, r0 +; ARM7-NEXT: vmov.16 d16[1], r0 +; ARM7-NEXT: vmov.16 d16[2], r2 +; ARM7-NEXT: vbic.i16 d16, #0xf800 +; ARM7-NEXT: vceq.i16 d16, d16, d17 +; ARM7-NEXT: vmvn d16, d16 +; ARM7-NEXT: vmov.u16 r0, d16[0] +; ARM7-NEXT: vmov.u16 r1, d16[1] +; ARM7-NEXT: vmov.u16 r2, d16[2] +; ARM7-NEXT: pop {r4, pc} +; ARM7-NEXT: .p2align 3 +; ARM7-NEXT: @ %bb.1: +; ARM7-NEXT: .LCPI4_0: +; ARM7-NEXT: .short 0 @ 0x0 +; ARM7-NEXT: .short 1 @ 0x1 +; ARM7-NEXT: .short 2 @ 0x2 +; ARM7-NEXT: .short 0 @ 0x0 +; +; ARM8-LABEL: test_urem_vec: +; ARM8: @ %bb.0: +; ARM8-NEXT: push {r4, lr} +; ARM8-NEXT: movw r3, #18725 +; ARM8-NEXT: bfc r1, #11, #21 +; ARM8-NEXT: movt r3, #9362 +; ARM8-NEXT: bfc r2, #11, #21 +; ARM8-NEXT: umull r3, r12, r1, r3 +; ARM8-NEXT: bfc r0, #11, #21 +; ARM8-NEXT: movw r3, #25663 +; ARM8-NEXT: movt r3, #160 +; ARM8-NEXT: umull r3, lr, r2, r3 +; ARM8-NEXT: vldr d17, .LCPI4_0 +; ARM8-NEXT: movw r3, #43691 +; ARM8-NEXT: movt r3, #43690 +; ARM8-NEXT: umull r3, r4, r0, r3 +; ARM8-NEXT: sub r3, r1, r12 +; ARM8-NEXT: add r3, r12, r3, lsr #1 +; ARM8-NEXT: lsr r12, r3, #2 +; ARM8-NEXT: sub r3, r2, lr +; ARM8-NEXT: lsr r4, r4, #2 +; ARM8-NEXT: add r4, r4, r4, lsl #1 +; ARM8-NEXT: add r3, lr, r3, lsr #1 +; ARM8-NEXT: sub r0, r0, r4, lsl #1 +; ARM8-NEXT: lsr lr, r3, #10 +; ARM8-NEXT: movw r3, #2043 +; ARM8-NEXT: vmov.16 d16[0], r0 +; ARM8-NEXT: sub r0, r12, r12, lsl #3 +; ARM8-NEXT: mls r2, lr, r3, r2 +; ARM8-NEXT: add r0, r1, r0 +; ARM8-NEXT: vmov.16 d16[1], r0 +; ARM8-NEXT: vmov.16 d16[2], r2 +; ARM8-NEXT: vbic.i16 d16, #0xf800 +; ARM8-NEXT: vceq.i16 d16, d16, d17 +; ARM8-NEXT: vmvn d16, d16 +; ARM8-NEXT: vmov.u16 r0, d16[0] +; ARM8-NEXT: vmov.u16 r1, d16[1] +; ARM8-NEXT: vmov.u16 r2, d16[2] +; ARM8-NEXT: pop {r4, pc} +; ARM8-NEXT: .p2align 3 +; ARM8-NEXT: @ %bb.1: +; ARM8-NEXT: .LCPI4_0: +; ARM8-NEXT: .short 0 @ 0x0 +; ARM8-NEXT: .short 1 @ 0x1 +; ARM8-NEXT: .short 2 @ 0x2 +; ARM8-NEXT: .short 0 @ 0x0 +; +; NEON7-LABEL: test_urem_vec: +; NEON7: @ %bb.0: +; NEON7-NEXT: push {r4, lr} +; NEON7-NEXT: movw r3, #18725 +; NEON7-NEXT: bfc r1, #11, #21 +; NEON7-NEXT: movt r3, #9362 +; NEON7-NEXT: bfc r2, #11, #21 +; NEON7-NEXT: umull r3, r12, r1, r3 +; NEON7-NEXT: bfc r0, #11, #21 +; NEON7-NEXT: movw r3, #25663 +; NEON7-NEXT: movt r3, #160 +; NEON7-NEXT: umull r3, lr, r2, r3 +; NEON7-NEXT: vldr d17, .LCPI4_0 +; NEON7-NEXT: movw r3, #43691 +; NEON7-NEXT: movt r3, #43690 +; NEON7-NEXT: umull r3, r4, r0, r3 +; NEON7-NEXT: sub r3, r1, r12 +; NEON7-NEXT: add r3, r12, r3, lsr #1 +; NEON7-NEXT: lsr r12, r3, #2 +; NEON7-NEXT: sub r3, r2, lr +; NEON7-NEXT: lsr r4, r4, #2 +; NEON7-NEXT: add r4, r4, r4, lsl #1 +; NEON7-NEXT: add r3, lr, r3, lsr #1 +; NEON7-NEXT: sub r0, r0, r4, lsl #1 +; NEON7-NEXT: lsr lr, r3, #10 +; NEON7-NEXT: movw r3, #2043 +; NEON7-NEXT: vmov.16 d16[0], r0 +; NEON7-NEXT: sub r0, r12, r12, lsl #3 +; NEON7-NEXT: mls r2, lr, r3, r2 +; NEON7-NEXT: add r0, r1, r0 +; NEON7-NEXT: vmov.16 d16[1], r0 +; NEON7-NEXT: vmov.16 d16[2], r2 +; NEON7-NEXT: vbic.i16 d16, #0xf800 +; NEON7-NEXT: vceq.i16 d16, d16, d17 +; NEON7-NEXT: vmvn d16, d16 +; NEON7-NEXT: vmov.u16 r0, d16[0] +; NEON7-NEXT: vmov.u16 r1, d16[1] +; NEON7-NEXT: vmov.u16 r2, d16[2] +; NEON7-NEXT: pop {r4, pc} +; NEON7-NEXT: .p2align 3 +; NEON7-NEXT: @ %bb.1: +; NEON7-NEXT: .LCPI4_0: +; NEON7-NEXT: .short 0 @ 0x0 +; NEON7-NEXT: .short 1 @ 0x1 +; NEON7-NEXT: .short 2 @ 0x2 +; NEON7-NEXT: .short 0 @ 0x0 +; +; NEON8-LABEL: test_urem_vec: +; NEON8: @ %bb.0: +; NEON8-NEXT: push {r4, lr} +; NEON8-NEXT: movw r3, #18725 +; NEON8-NEXT: bfc r1, #11, #21 +; NEON8-NEXT: movt r3, #9362 +; NEON8-NEXT: bfc r2, #11, #21 +; NEON8-NEXT: umull r3, r12, r1, r3 +; NEON8-NEXT: bfc r0, #11, #21 +; NEON8-NEXT: movw r3, #25663 +; NEON8-NEXT: movt r3, #160 +; NEON8-NEXT: umull r3, lr, r2, r3 +; NEON8-NEXT: vldr d17, .LCPI4_0 +; NEON8-NEXT: movw r3, #43691 +; NEON8-NEXT: movt r3, #43690 +; NEON8-NEXT: umull r3, r4, r0, r3 +; NEON8-NEXT: sub r3, r1, r12 +; NEON8-NEXT: add r3, r12, r3, lsr #1 +; NEON8-NEXT: lsr r12, r3, #2 +; NEON8-NEXT: sub r3, r2, lr +; NEON8-NEXT: lsr r4, r4, #2 +; NEON8-NEXT: add r4, r4, r4, lsl #1 +; NEON8-NEXT: add r3, lr, r3, lsr #1 +; NEON8-NEXT: sub r0, r0, r4, lsl #1 +; NEON8-NEXT: lsr lr, r3, #10 +; NEON8-NEXT: movw r3, #2043 +; NEON8-NEXT: vmov.16 d16[0], r0 +; NEON8-NEXT: sub r0, r12, r12, lsl #3 +; NEON8-NEXT: mls r2, lr, r3, r2 +; NEON8-NEXT: add r0, r1, r0 +; NEON8-NEXT: vmov.16 d16[1], r0 +; NEON8-NEXT: vmov.16 d16[2], r2 +; NEON8-NEXT: vbic.i16 d16, #0xf800 +; NEON8-NEXT: vceq.i16 d16, d16, d17 +; NEON8-NEXT: vmvn d16, d16 +; NEON8-NEXT: vmov.u16 r0, d16[0] +; NEON8-NEXT: vmov.u16 r1, d16[1] +; NEON8-NEXT: vmov.u16 r2, d16[2] +; NEON8-NEXT: pop {r4, pc} +; NEON8-NEXT: .p2align 3 +; NEON8-NEXT: @ %bb.1: +; NEON8-NEXT: .LCPI4_0: +; NEON8-NEXT: .short 0 @ 0x0 +; NEON8-NEXT: .short 1 @ 0x1 +; NEON8-NEXT: .short 2 @ 0x2 +; NEON8-NEXT: .short 0 @ 0x0 + %urem = urem <3 x i11> %X, + %cmp = icmp ne <3 x i11> %urem, + ret <3 x i1> %cmp +} + +define i1 @test_urem_larger(i63 %X) nounwind { +; ARM5-LABEL: test_urem_larger: +; ARM5: @ %bb.0: +; ARM5-NEXT: push {r11, lr} +; ARM5-NEXT: ldr r2, .LCPI5_0 +; ARM5-NEXT: bic r1, r1, #-2147483648 +; ARM5-NEXT: mov r3, #0 +; ARM5-NEXT: bl __umoddi3 +; ARM5-NEXT: orr r0, r0, r1 +; ARM5-NEXT: clz r0, r0 +; ARM5-NEXT: lsr r0, r0, #5 +; ARM5-NEXT: pop {r11, pc} +; ARM5-NEXT: .p2align 2 +; ARM5-NEXT: @ %bb.1: +; ARM5-NEXT: .LCPI5_0: +; ARM5-NEXT: .long 1234567890 @ 0x499602d2 +; +; ARM6-LABEL: test_urem_larger: +; ARM6: @ %bb.0: +; ARM6-NEXT: push {r11, lr} +; ARM6-NEXT: ldr r2, .LCPI5_0 +; ARM6-NEXT: bic r1, r1, #-2147483648 +; ARM6-NEXT: mov r3, #0 +; ARM6-NEXT: bl __umoddi3 +; ARM6-NEXT: orr r0, r0, r1 +; ARM6-NEXT: clz r0, r0 +; ARM6-NEXT: lsr r0, r0, #5 +; ARM6-NEXT: pop {r11, pc} +; ARM6-NEXT: .p2align 2 +; ARM6-NEXT: @ %bb.1: +; ARM6-NEXT: .LCPI5_0: +; ARM6-NEXT: .long 1234567890 @ 0x499602d2 +; +; ARM7-LABEL: test_urem_larger: +; ARM7: @ %bb.0: +; ARM7-NEXT: push {r11, lr} +; ARM7-NEXT: movw r2, #722 +; ARM7-NEXT: bic r1, r1, #-2147483648 +; ARM7-NEXT: movt r2, #18838 +; ARM7-NEXT: mov r3, #0 +; ARM7-NEXT: bl __umoddi3 +; ARM7-NEXT: orr r0, r0, r1 +; ARM7-NEXT: clz r0, r0 +; ARM7-NEXT: lsr r0, r0, #5 +; ARM7-NEXT: pop {r11, pc} +; +; ARM8-LABEL: test_urem_larger: +; ARM8: @ %bb.0: +; ARM8-NEXT: push {r11, lr} +; ARM8-NEXT: movw r2, #722 +; ARM8-NEXT: bic r1, r1, #-2147483648 +; ARM8-NEXT: movt r2, #18838 +; ARM8-NEXT: mov r3, #0 +; ARM8-NEXT: bl __umoddi3 +; ARM8-NEXT: orr r0, r0, r1 +; ARM8-NEXT: clz r0, r0 +; ARM8-NEXT: lsr r0, r0, #5 +; ARM8-NEXT: pop {r11, pc} +; +; NEON7-LABEL: test_urem_larger: +; NEON7: @ %bb.0: +; NEON7-NEXT: push {r11, lr} +; NEON7-NEXT: movw r2, #722 +; NEON7-NEXT: bic r1, r1, #-2147483648 +; NEON7-NEXT: movt r2, #18838 +; NEON7-NEXT: mov r3, #0 +; NEON7-NEXT: bl __umoddi3 +; NEON7-NEXT: orr r0, r0, r1 +; NEON7-NEXT: clz r0, r0 +; NEON7-NEXT: lsr r0, r0, #5 +; NEON7-NEXT: pop {r11, pc} +; +; NEON8-LABEL: test_urem_larger: +; NEON8: @ %bb.0: +; NEON8-NEXT: push {r11, lr} +; NEON8-NEXT: movw r2, #722 +; NEON8-NEXT: bic r1, r1, #-2147483648 +; NEON8-NEXT: movt r2, #18838 +; NEON8-NEXT: mov r3, #0 +; NEON8-NEXT: bl __umoddi3 +; NEON8-NEXT: orr r0, r0, r1 +; NEON8-NEXT: clz r0, r0 +; NEON8-NEXT: lsr r0, r0, #5 +; NEON8-NEXT: pop {r11, pc} + %urem = urem i63 %X, 1234567890 + %cmp = icmp eq i63 %urem, 0 + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Mips/srem-seteq-illegal-types.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mips-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=MIPSEL +; RUN: llc -mtriple=mips64el-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=MIPS64EL + +define i1 @test_srem_odd(i29 %X) nounwind { +; MIPSEL-LABEL: test_srem_odd: +; MIPSEL: # %bb.0: +; MIPSEL-NEXT: lui $1, 48986 +; MIPSEL-NEXT: ori $1, $1, 33099 +; MIPSEL-NEXT: sll $2, $4, 3 +; MIPSEL-NEXT: sra $2, $2, 3 +; MIPSEL-NEXT: mul $1, $2, $1 +; MIPSEL-NEXT: lui $2, 330 +; MIPSEL-NEXT: ori $2, $2, 64874 +; MIPSEL-NEXT: addu $1, $1, $2 +; MIPSEL-NEXT: lui $2, 661 +; MIPSEL-NEXT: ori $2, $2, 64213 +; MIPSEL-NEXT: jr $ra +; MIPSEL-NEXT: sltu $2, $1, $2 +; +; MIPS64EL-LABEL: test_srem_odd: +; MIPS64EL: # %bb.0: +; MIPS64EL-NEXT: lui $1, 48986 +; MIPS64EL-NEXT: ori $1, $1, 33099 +; MIPS64EL-NEXT: sll $2, $4, 0 +; MIPS64EL-NEXT: sll $2, $2, 3 +; MIPS64EL-NEXT: sra $2, $2, 3 +; MIPS64EL-NEXT: mul $1, $2, $1 +; MIPS64EL-NEXT: lui $2, 330 +; MIPS64EL-NEXT: ori $2, $2, 64874 +; MIPS64EL-NEXT: addu $1, $1, $2 +; MIPS64EL-NEXT: lui $2, 661 +; MIPS64EL-NEXT: ori $2, $2, 64213 +; MIPS64EL-NEXT: jr $ra +; MIPS64EL-NEXT: sltu $2, $1, $2 + %srem = srem i29 %X, 99 + %cmp = icmp eq i29 %srem, 0 + ret i1 %cmp +} + +define i1 @test_srem_even(i4 %X) nounwind { +; MIPSEL-LABEL: test_srem_even: +; MIPSEL: # %bb.0: +; MIPSEL-NEXT: lui $1, 10922 +; MIPSEL-NEXT: ori $1, $1, 43691 +; MIPSEL-NEXT: sll $2, $4, 28 +; MIPSEL-NEXT: sra $2, $2, 28 +; MIPSEL-NEXT: mult $2, $1 +; MIPSEL-NEXT: mfhi $1 +; MIPSEL-NEXT: srl $3, $1, 31 +; MIPSEL-NEXT: addu $1, $1, $3 +; MIPSEL-NEXT: addiu $3, $zero, 1 +; MIPSEL-NEXT: sll $4, $1, 1 +; MIPSEL-NEXT: sll $1, $1, 2 +; MIPSEL-NEXT: addu $1, $1, $4 +; MIPSEL-NEXT: subu $1, $2, $1 +; MIPSEL-NEXT: xor $1, $1, $3 +; MIPSEL-NEXT: jr $ra +; MIPSEL-NEXT: sltiu $2, $1, 1 +; +; MIPS64EL-LABEL: test_srem_even: +; MIPS64EL: # %bb.0: +; MIPS64EL-NEXT: lui $1, 10922 +; MIPS64EL-NEXT: ori $1, $1, 43691 +; MIPS64EL-NEXT: sll $2, $4, 0 +; MIPS64EL-NEXT: sll $2, $2, 28 +; MIPS64EL-NEXT: sra $2, $2, 28 +; MIPS64EL-NEXT: mult $2, $1 +; MIPS64EL-NEXT: mfhi $1 +; MIPS64EL-NEXT: addiu $3, $zero, 1 +; MIPS64EL-NEXT: srl $4, $1, 31 +; MIPS64EL-NEXT: addu $1, $1, $4 +; MIPS64EL-NEXT: sll $4, $1, 1 +; MIPS64EL-NEXT: sll $1, $1, 2 +; MIPS64EL-NEXT: addu $1, $1, $4 +; MIPS64EL-NEXT: subu $1, $2, $1 +; MIPS64EL-NEXT: xor $1, $1, $3 +; MIPS64EL-NEXT: jr $ra +; MIPS64EL-NEXT: sltiu $2, $1, 1 + %srem = srem i4 %X, 6 + %cmp = icmp eq i4 %srem, 1 + ret i1 %cmp +} + +define i1 @test_srem_pow2_setne(i6 %X) nounwind { +; MIPSEL-LABEL: test_srem_pow2_setne: +; MIPSEL: # %bb.0: +; MIPSEL-NEXT: sll $1, $4, 26 +; MIPSEL-NEXT: sra $1, $1, 26 +; MIPSEL-NEXT: srl $1, $1, 9 +; MIPSEL-NEXT: andi $1, $1, 3 +; MIPSEL-NEXT: addu $1, $4, $1 +; MIPSEL-NEXT: andi $1, $1, 60 +; MIPSEL-NEXT: subu $1, $4, $1 +; MIPSEL-NEXT: andi $1, $1, 63 +; MIPSEL-NEXT: jr $ra +; MIPSEL-NEXT: sltu $2, $zero, $1 +; +; MIPS64EL-LABEL: test_srem_pow2_setne: +; MIPS64EL: # %bb.0: +; MIPS64EL-NEXT: sll $1, $4, 0 +; MIPS64EL-NEXT: sll $2, $1, 26 +; MIPS64EL-NEXT: sra $2, $2, 26 +; MIPS64EL-NEXT: srl $2, $2, 9 +; MIPS64EL-NEXT: andi $2, $2, 3 +; MIPS64EL-NEXT: addu $2, $1, $2 +; MIPS64EL-NEXT: andi $2, $2, 60 +; MIPS64EL-NEXT: subu $1, $1, $2 +; MIPS64EL-NEXT: andi $1, $1, 63 +; MIPS64EL-NEXT: jr $ra +; MIPS64EL-NEXT: sltu $2, $zero, $1 + %srem = srem i6 %X, 4 + %cmp = icmp ne i6 %srem, 0 + ret i1 %cmp +} + +; Asserts today +; See https://bugs.llvm.org/show_bug.cgi?id=49551 +; and https://bugs.llvm.org/show_bug.cgi?id=49550 +; define <4 x i1> @test_srem_vec(<4 x i31> %X) nounwind { +; %srem = srem <4 x i31> %X, +; %cmp = icmp ne <4 x i31> %srem, +; ret <4 x i1> %cmp +; } diff --git a/llvm/test/CodeGen/Mips/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/Mips/urem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Mips/urem-seteq-illegal-types.ll @@ -0,0 +1,179 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=mips-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=MIPSEL +; RUN: llc -mtriple=mips64el-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=MIPS64EL + +define i1 @test_urem_odd(i13 %X) nounwind { +; MIPSEL-LABEL: test_urem_odd: +; MIPSEL: # %bb.0: +; MIPSEL-NEXT: lui $1, 52428 +; MIPSEL-NEXT: ori $1, $1, 52429 +; MIPSEL-NEXT: andi $2, $4, 8191 +; MIPSEL-NEXT: mul $1, $2, $1 +; MIPSEL-NEXT: lui $2, 13107 +; MIPSEL-NEXT: ori $2, $2, 13108 +; MIPSEL-NEXT: jr $ra +; MIPSEL-NEXT: sltu $2, $1, $2 +; +; MIPS64EL-LABEL: test_urem_odd: +; MIPS64EL: # %bb.0: +; MIPS64EL-NEXT: lui $1, 52428 +; MIPS64EL-NEXT: ori $1, $1, 52429 +; MIPS64EL-NEXT: sll $2, $4, 0 +; MIPS64EL-NEXT: andi $2, $2, 8191 +; MIPS64EL-NEXT: mul $1, $2, $1 +; MIPS64EL-NEXT: lui $2, 13107 +; MIPS64EL-NEXT: ori $2, $2, 13108 +; MIPS64EL-NEXT: jr $ra +; MIPS64EL-NEXT: sltu $2, $1, $2 + %urem = urem i13 %X, 5 + %cmp = icmp eq i13 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_even(i27 %X) nounwind { +; MIPSEL-LABEL: test_urem_even: +; MIPSEL: # %bb.0: +; MIPSEL-NEXT: lui $1, 2047 +; MIPSEL-NEXT: ori $1, $1, 65535 +; MIPSEL-NEXT: and $1, $4, $1 +; MIPSEL-NEXT: srl $2, $1, 1 +; MIPSEL-NEXT: lui $3, 37449 +; MIPSEL-NEXT: ori $3, $3, 9363 +; MIPSEL-NEXT: multu $2, $3 +; MIPSEL-NEXT: mfhi $2 +; MIPSEL-NEXT: srl $2, $2, 2 +; MIPSEL-NEXT: sll $3, $2, 4 +; MIPSEL-NEXT: sll $2, $2, 1 +; MIPSEL-NEXT: subu $2, $2, $3 +; MIPSEL-NEXT: addu $1, $1, $2 +; MIPSEL-NEXT: jr $ra +; MIPSEL-NEXT: sltiu $2, $1, 1 +; +; MIPS64EL-LABEL: test_urem_even: +; MIPS64EL: # %bb.0: +; MIPS64EL-NEXT: lui $1, 2047 +; MIPS64EL-NEXT: ori $1, $1, 65535 +; MIPS64EL-NEXT: sll $2, $4, 0 +; MIPS64EL-NEXT: and $1, $2, $1 +; MIPS64EL-NEXT: srl $2, $1, 1 +; MIPS64EL-NEXT: lui $3, 37449 +; MIPS64EL-NEXT: ori $3, $3, 9363 +; MIPS64EL-NEXT: multu $2, $3 +; MIPS64EL-NEXT: mfhi $2 +; MIPS64EL-NEXT: srl $2, $2, 2 +; MIPS64EL-NEXT: sll $3, $2, 4 +; MIPS64EL-NEXT: sll $2, $2, 1 +; MIPS64EL-NEXT: subu $2, $2, $3 +; MIPS64EL-NEXT: addu $1, $1, $2 +; MIPS64EL-NEXT: jr $ra +; MIPS64EL-NEXT: sltiu $2, $1, 1 + %urem = urem i27 %X, 14 + %cmp = icmp eq i27 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_odd_setne(i4 %X) nounwind { +; MIPSEL-LABEL: test_urem_odd_setne: +; MIPSEL: # %bb.0: +; MIPSEL-NEXT: lui $1, 52428 +; MIPSEL-NEXT: ori $1, $1, 52429 +; MIPSEL-NEXT: andi $2, $4, 15 +; MIPSEL-NEXT: mul $1, $2, $1 +; MIPSEL-NEXT: lui $2, 13107 +; MIPSEL-NEXT: ori $2, $2, 13107 +; MIPSEL-NEXT: jr $ra +; MIPSEL-NEXT: sltu $2, $2, $1 +; +; MIPS64EL-LABEL: test_urem_odd_setne: +; MIPS64EL: # %bb.0: +; MIPS64EL-NEXT: lui $1, 52428 +; MIPS64EL-NEXT: ori $1, $1, 52429 +; MIPS64EL-NEXT: sll $2, $4, 0 +; MIPS64EL-NEXT: andi $2, $2, 15 +; MIPS64EL-NEXT: mul $1, $2, $1 +; MIPS64EL-NEXT: lui $2, 13107 +; MIPS64EL-NEXT: ori $2, $2, 13107 +; MIPS64EL-NEXT: jr $ra +; MIPS64EL-NEXT: sltu $2, $2, $1 + %urem = urem i4 %X, 5 + %cmp = icmp ne i4 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_negative_odd(i9 %X) nounwind { +; MIPSEL-LABEL: test_urem_negative_odd: +; MIPSEL: # %bb.0: +; MIPSEL-NEXT: lui $1, 43302 +; MIPSEL-NEXT: ori $1, $1, 57651 +; MIPSEL-NEXT: andi $2, $4, 511 +; MIPSEL-NEXT: mul $1, $2, $1 +; MIPSEL-NEXT: lui $2, 129 +; MIPSEL-NEXT: ori $2, $2, 17191 +; MIPSEL-NEXT: jr $ra +; MIPSEL-NEXT: sltu $2, $2, $1 +; +; MIPS64EL-LABEL: test_urem_negative_odd: +; MIPS64EL: # %bb.0: +; MIPS64EL-NEXT: lui $1, 43302 +; MIPS64EL-NEXT: ori $1, $1, 57651 +; MIPS64EL-NEXT: sll $2, $4, 0 +; MIPS64EL-NEXT: andi $2, $2, 511 +; MIPS64EL-NEXT: mul $1, $2, $1 +; MIPS64EL-NEXT: lui $2, 129 +; MIPS64EL-NEXT: ori $2, $2, 17191 +; MIPS64EL-NEXT: jr $ra +; MIPS64EL-NEXT: sltu $2, $2, $1 + %urem = urem i9 %X, -5 + %cmp = icmp ne i9 %urem, 0 + ret i1 %cmp +} + +; Asserts today +; define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { +; %urem = urem <3 x i11> %X, +; %cmp = icmp ne <3 x i11> %urem, +; ret <3 x i1> %cmp +; } + +define i1 @test_urem_oversized(i66 %X) nounwind { +; MIPSEL-LABEL: test_urem_oversized: +; MIPSEL: # %bb.0: +; MIPSEL-NEXT: addiu $sp, $sp, -40 +; MIPSEL-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: move $7, $6 +; MIPSEL-NEXT: move $6, $5 +; MIPSEL-NEXT: lui $1, 18838 +; MIPSEL-NEXT: ori $1, $1, 722 +; MIPSEL-NEXT: sw $1, 28($sp) +; MIPSEL-NEXT: sw $zero, 24($sp) +; MIPSEL-NEXT: sw $zero, 20($sp) +; MIPSEL-NEXT: sw $zero, 16($sp) +; MIPSEL-NEXT: andi $5, $4, 3 +; MIPSEL-NEXT: jal __umodti3 +; MIPSEL-NEXT: addiu $4, $zero, 0 +; MIPSEL-NEXT: or $1, $4, $2 +; MIPSEL-NEXT: or $2, $5, $3 +; MIPSEL-NEXT: or $1, $2, $1 +; MIPSEL-NEXT: sltiu $2, $1, 1 +; MIPSEL-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: jr $ra +; MIPSEL-NEXT: addiu $sp, $sp, 40 +; +; MIPS64EL-LABEL: test_urem_oversized: +; MIPS64EL: # %bb.0: +; MIPS64EL-NEXT: daddiu $sp, $sp, -16 +; MIPS64EL-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; MIPS64EL-NEXT: andi $5, $5, 3 +; MIPS64EL-NEXT: lui $1, 18838 +; MIPS64EL-NEXT: ori $6, $1, 722 +; MIPS64EL-NEXT: jal __umodti3 +; MIPS64EL-NEXT: daddiu $7, $zero, 0 +; MIPS64EL-NEXT: or $1, $2, $3 +; MIPS64EL-NEXT: sltiu $2, $1, 1 +; MIPS64EL-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; MIPS64EL-NEXT: jr $ra +; MIPS64EL-NEXT: daddiu $sp, $sp, 16 + %urem = urem i66 %X, 1234567890 + %cmp = icmp eq i66 %urem, 0 + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll @@ -0,0 +1,250 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=PPC +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=PPC64LE + +define i1 @test_srem_odd(i29 %X) nounwind { +; PPC-LABEL: test_srem_odd: +; PPC: # %bb.0: +; PPC-NEXT: lis 4, -23170 +; PPC-NEXT: slwi 3, 3, 3 +; PPC-NEXT: ori 4, 4, 46339 +; PPC-NEXT: srawi 3, 3, 3 +; PPC-NEXT: mulhw 4, 3, 4 +; PPC-NEXT: add 4, 4, 3 +; PPC-NEXT: srwi 5, 4, 31 +; PPC-NEXT: srawi 4, 4, 6 +; PPC-NEXT: add 4, 4, 5 +; PPC-NEXT: mulli 4, 4, 99 +; PPC-NEXT: sub 3, 3, 4 +; PPC-NEXT: cntlzw 3, 3 +; PPC-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC-NEXT: blr +; +; PPC64LE-LABEL: test_srem_odd: +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: lis 4, -23170 +; PPC64LE-NEXT: slwi 3, 3, 3 +; PPC64LE-NEXT: srawi 3, 3, 3 +; PPC64LE-NEXT: ori 4, 4, 46339 +; PPC64LE-NEXT: mulhw 4, 3, 4 +; PPC64LE-NEXT: add 4, 4, 3 +; PPC64LE-NEXT: srwi 5, 4, 31 +; PPC64LE-NEXT: srawi 4, 4, 6 +; PPC64LE-NEXT: add 4, 4, 5 +; PPC64LE-NEXT: mulli 4, 4, 99 +; PPC64LE-NEXT: sub 3, 3, 4 +; PPC64LE-NEXT: cntlzw 3, 3 +; PPC64LE-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC64LE-NEXT: blr + %srem = srem i29 %X, 99 + %cmp = icmp eq i29 %srem, 0 + ret i1 %cmp +} + +define i1 @test_srem_even(i4 %X) nounwind { +; PPC-LABEL: test_srem_even: +; PPC: # %bb.0: +; PPC-NEXT: lis 4, 10922 +; PPC-NEXT: slwi 3, 3, 28 +; PPC-NEXT: ori 4, 4, 43691 +; PPC-NEXT: srawi 3, 3, 28 +; PPC-NEXT: mulhw 4, 3, 4 +; PPC-NEXT: srwi 5, 4, 31 +; PPC-NEXT: add 4, 4, 5 +; PPC-NEXT: li 5, 0 +; PPC-NEXT: mulli 4, 4, 6 +; PPC-NEXT: sub 3, 3, 4 +; PPC-NEXT: cmpwi 3, 1 +; PPC-NEXT: li 3, 1 +; PPC-NEXT: bclr 12, 2, 0 +; PPC-NEXT: # %bb.1: +; PPC-NEXT: ori 3, 5, 0 +; PPC-NEXT: blr +; +; PPC64LE-LABEL: test_srem_even: +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: lis 4, 10922 +; PPC64LE-NEXT: slwi 3, 3, 28 +; PPC64LE-NEXT: ori 4, 4, 43691 +; PPC64LE-NEXT: srawi 3, 3, 28 +; PPC64LE-NEXT: mulhw 4, 3, 4 +; PPC64LE-NEXT: srwi 5, 4, 31 +; PPC64LE-NEXT: add 4, 4, 5 +; PPC64LE-NEXT: mulli 4, 4, 6 +; PPC64LE-NEXT: sub 3, 3, 4 +; PPC64LE-NEXT: li 4, 1 +; PPC64LE-NEXT: cmpwi 3, 1 +; PPC64LE-NEXT: li 3, 0 +; PPC64LE-NEXT: iseleq 3, 4, 3 +; PPC64LE-NEXT: blr + %srem = srem i4 %X, 6 + %cmp = icmp eq i4 %srem, 1 + ret i1 %cmp +} + +define i1 @test_srem_pow2_setne(i6 %X) nounwind { +; PPC-LABEL: test_srem_pow2_setne: +; PPC: # %bb.0: +; PPC-NEXT: slwi 4, 3, 26 +; PPC-NEXT: srawi 4, 4, 26 +; PPC-NEXT: rlwinm 4, 4, 23, 30, 31 +; PPC-NEXT: add 4, 3, 4 +; PPC-NEXT: rlwinm 4, 4, 0, 26, 29 +; PPC-NEXT: sub 3, 3, 4 +; PPC-NEXT: clrlwi 3, 3, 26 +; PPC-NEXT: cntlzw 3, 3 +; PPC-NEXT: not 3, 3 +; PPC-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC-NEXT: blr +; +; PPC64LE-LABEL: test_srem_pow2_setne: +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: slwi 4, 3, 26 +; PPC64LE-NEXT: srawi 4, 4, 26 +; PPC64LE-NEXT: rlwinm 4, 4, 23, 30, 31 +; PPC64LE-NEXT: add 4, 3, 4 +; PPC64LE-NEXT: rlwinm 4, 4, 0, 26, 29 +; PPC64LE-NEXT: sub 3, 3, 4 +; PPC64LE-NEXT: clrlwi 3, 3, 26 +; PPC64LE-NEXT: cntlzw 3, 3 +; PPC64LE-NEXT: not 3, 3 +; PPC64LE-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC64LE-NEXT: blr + %srem = srem i6 %X, 4 + %cmp = icmp ne i6 %srem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { +; PPC-LABEL: test_srem_vec: +; PPC: # %bb.0: +; PPC-NEXT: mflr 0 +; PPC-NEXT: stw 0, 4(1) +; PPC-NEXT: stwu 1, -48(1) +; PPC-NEXT: stw 29, 36(1) # 4-byte Folded Spill +; PPC-NEXT: mr 29, 6 +; PPC-NEXT: clrlwi 5, 5, 31 +; PPC-NEXT: clrlwi 6, 7, 31 +; PPC-NEXT: clrlwi 3, 3, 31 +; PPC-NEXT: stw 27, 28(1) # 4-byte Folded Spill +; PPC-NEXT: neg 27, 6 +; PPC-NEXT: stw 28, 32(1) # 4-byte Folded Spill +; PPC-NEXT: neg 28, 5 +; PPC-NEXT: neg 3, 3 +; PPC-NEXT: li 5, 0 +; PPC-NEXT: li 6, 9 +; PPC-NEXT: stw 25, 20(1) # 4-byte Folded Spill +; PPC-NEXT: stw 26, 24(1) # 4-byte Folded Spill +; PPC-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; PPC-NEXT: mr 30, 8 +; PPC-NEXT: bl __moddi3 +; PPC-NEXT: mr 26, 3 +; PPC-NEXT: mr 25, 4 +; PPC-NEXT: mr 3, 27 +; PPC-NEXT: mr 4, 30 +; PPC-NEXT: li 5, -1 +; PPC-NEXT: li 6, -9 +; PPC-NEXT: bl __moddi3 +; PPC-NEXT: mr 30, 3 +; PPC-NEXT: mr 27, 4 +; PPC-NEXT: mr 3, 28 +; PPC-NEXT: mr 4, 29 +; PPC-NEXT: li 5, 0 +; PPC-NEXT: li 6, 9 +; PPC-NEXT: bl __moddi3 +; PPC-NEXT: not 3, 3 +; PPC-NEXT: xori 4, 4, 65533 +; PPC-NEXT: xori 5, 27, 3 +; PPC-NEXT: xori 6, 25, 3 +; PPC-NEXT: clrlwi 3, 3, 31 +; PPC-NEXT: xoris 4, 4, 65535 +; PPC-NEXT: or 5, 5, 30 +; PPC-NEXT: or 6, 6, 26 +; PPC-NEXT: or 4, 4, 3 +; PPC-NEXT: cntlzw 6, 6 +; PPC-NEXT: cntlzw 5, 5 +; PPC-NEXT: cntlzw 4, 4 +; PPC-NEXT: not 3, 6 +; PPC-NEXT: not 5, 5 +; PPC-NEXT: not 4, 4 +; PPC-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC-NEXT: rlwinm 5, 5, 27, 31, 31 +; PPC-NEXT: rlwinm 4, 4, 27, 31, 31 +; PPC-NEXT: lwz 30, 40(1) # 4-byte Folded Reload +; PPC-NEXT: lwz 29, 36(1) # 4-byte Folded Reload +; PPC-NEXT: lwz 28, 32(1) # 4-byte Folded Reload +; PPC-NEXT: lwz 27, 28(1) # 4-byte Folded Reload +; PPC-NEXT: lwz 26, 24(1) # 4-byte Folded Reload +; PPC-NEXT: lwz 25, 20(1) # 4-byte Folded Reload +; PPC-NEXT: lwz 0, 52(1) +; PPC-NEXT: addi 1, 1, 48 +; PPC-NEXT: mtlr 0 +; PPC-NEXT: blr +; +; PPC64LE-LABEL: test_srem_vec: +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: lis 6, 1820 +; PPC64LE-NEXT: sldi 3, 3, 31 +; PPC64LE-NEXT: ori 6, 6, 29127 +; PPC64LE-NEXT: sldi 5, 5, 31 +; PPC64LE-NEXT: rldic 6, 6, 34, 3 +; PPC64LE-NEXT: sldi 4, 4, 31 +; PPC64LE-NEXT: oris 6, 6, 29127 +; PPC64LE-NEXT: sradi 3, 3, 31 +; PPC64LE-NEXT: ori 7, 6, 7282 +; PPC64LE-NEXT: sradi 5, 5, 31 +; PPC64LE-NEXT: ori 6, 6, 7281 +; PPC64LE-NEXT: sradi 4, 4, 31 +; PPC64LE-NEXT: mulhd 8, 3, 7 +; PPC64LE-NEXT: mulhd 7, 4, 7 +; PPC64LE-NEXT: mulhd 6, 5, 6 +; PPC64LE-NEXT: rldicl 9, 8, 1, 63 +; PPC64LE-NEXT: rldicl 10, 7, 1, 63 +; PPC64LE-NEXT: sub 6, 6, 5 +; PPC64LE-NEXT: add 8, 8, 9 +; PPC64LE-NEXT: rldicl 9, 6, 1, 63 +; PPC64LE-NEXT: add 7, 7, 10 +; PPC64LE-NEXT: sradi 6, 6, 3 +; PPC64LE-NEXT: sldi 10, 8, 3 +; PPC64LE-NEXT: add 6, 6, 9 +; PPC64LE-NEXT: add 8, 8, 10 +; PPC64LE-NEXT: addis 9, 2, .LCPI3_0@toc@ha +; PPC64LE-NEXT: sldi 10, 7, 3 +; PPC64LE-NEXT: sub 3, 3, 8 +; PPC64LE-NEXT: addi 9, 9, .LCPI3_0@toc@l +; PPC64LE-NEXT: add 7, 7, 10 +; PPC64LE-NEXT: sldi 8, 6, 3 +; PPC64LE-NEXT: lxvd2x 0, 0, 9 +; PPC64LE-NEXT: mtfprd 1, 3 +; PPC64LE-NEXT: sub 4, 4, 7 +; PPC64LE-NEXT: add 6, 6, 8 +; PPC64LE-NEXT: addis 7, 2, .LCPI3_1@toc@ha +; PPC64LE-NEXT: mtfprd 2, 4 +; PPC64LE-NEXT: add 4, 5, 6 +; PPC64LE-NEXT: addi 3, 7, .LCPI3_1@toc@l +; PPC64LE-NEXT: addis 5, 2, .LCPI3_2@toc@ha +; PPC64LE-NEXT: mtfprd 4, 4 +; PPC64LE-NEXT: lxvd2x 3, 0, 3 +; PPC64LE-NEXT: addi 3, 5, .LCPI3_2@toc@l +; PPC64LE-NEXT: xxswapd 34, 0 +; PPC64LE-NEXT: xxmrghd 35, 2, 1 +; PPC64LE-NEXT: lxvd2x 0, 0, 3 +; PPC64LE-NEXT: xxswapd 36, 4 +; PPC64LE-NEXT: xxswapd 37, 3 +; PPC64LE-NEXT: xxland 35, 35, 34 +; PPC64LE-NEXT: xxland 34, 36, 34 +; PPC64LE-NEXT: xxswapd 36, 0 +; PPC64LE-NEXT: vcmpequd 3, 3, 5 +; PPC64LE-NEXT: vcmpequd 2, 2, 4 +; PPC64LE-NEXT: xxlnor 0, 35, 35 +; PPC64LE-NEXT: xxlnor 34, 34, 34 +; PPC64LE-NEXT: xxswapd 1, 0 +; PPC64LE-NEXT: mffprwz 4, 0 +; PPC64LE-NEXT: xxswapd 2, 34 +; PPC64LE-NEXT: mffprwz 3, 1 +; PPC64LE-NEXT: mffprwz 5, 2 +; PPC64LE-NEXT: blr + %srem = srem <3 x i33> %X, + %cmp = icmp ne <3 x i33> %srem, + ret <3 x i1> %cmp +} diff --git a/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=PPC +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=PPC64LE + +define i1 @test_urem_odd(i13 %X) nounwind { +; PPC-LABEL: test_urem_odd: +; PPC: # %bb.0: +; PPC-NEXT: lis 4, -13108 +; PPC-NEXT: clrlwi 3, 3, 19 +; PPC-NEXT: ori 4, 4, 52429 +; PPC-NEXT: mulhwu 4, 3, 4 +; PPC-NEXT: srwi 4, 4, 2 +; PPC-NEXT: mulli 4, 4, 5 +; PPC-NEXT: sub 3, 3, 4 +; PPC-NEXT: cntlzw 3, 3 +; PPC-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC-NEXT: blr +; +; PPC64LE-LABEL: test_urem_odd: +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: lis 4, -13108 +; PPC64LE-NEXT: clrlwi 3, 3, 19 +; PPC64LE-NEXT: ori 4, 4, 52429 +; PPC64LE-NEXT: mulhwu 4, 3, 4 +; PPC64LE-NEXT: rlwinm 5, 4, 0, 0, 29 +; PPC64LE-NEXT: srwi 4, 4, 2 +; PPC64LE-NEXT: add 4, 4, 5 +; PPC64LE-NEXT: sub 3, 3, 4 +; PPC64LE-NEXT: cntlzw 3, 3 +; PPC64LE-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC64LE-NEXT: blr + %urem = urem i13 %X, 5 + %cmp = icmp eq i13 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_even(i27 %X) nounwind { +; PPC-LABEL: test_urem_even: +; PPC: # %bb.0: +; PPC-NEXT: lis 4, -28087 +; PPC-NEXT: rlwinm 5, 3, 31, 6, 31 +; PPC-NEXT: ori 4, 4, 9363 +; PPC-NEXT: mulhwu 4, 5, 4 +; PPC-NEXT: srwi 4, 4, 2 +; PPC-NEXT: clrlwi 3, 3, 5 +; PPC-NEXT: mulli 4, 4, 14 +; PPC-NEXT: sub 3, 3, 4 +; PPC-NEXT: cntlzw 3, 3 +; PPC-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC-NEXT: blr +; +; PPC64LE-LABEL: test_urem_even: +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: lis 4, -28087 +; PPC64LE-NEXT: rlwinm 5, 3, 31, 6, 31 +; PPC64LE-NEXT: clrlwi 3, 3, 5 +; PPC64LE-NEXT: ori 4, 4, 9363 +; PPC64LE-NEXT: mulhwu 4, 5, 4 +; PPC64LE-NEXT: srwi 4, 4, 2 +; PPC64LE-NEXT: mulli 4, 4, 14 +; PPC64LE-NEXT: sub 3, 3, 4 +; PPC64LE-NEXT: cntlzw 3, 3 +; PPC64LE-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC64LE-NEXT: blr + %urem = urem i27 %X, 14 + %cmp = icmp eq i27 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_odd_setne(i4 %X) nounwind { +; PPC-LABEL: test_urem_odd_setne: +; PPC: # %bb.0: +; PPC-NEXT: lis 4, -13108 +; PPC-NEXT: clrlwi 3, 3, 28 +; PPC-NEXT: ori 4, 4, 52429 +; PPC-NEXT: mulhwu 4, 3, 4 +; PPC-NEXT: srwi 4, 4, 2 +; PPC-NEXT: mulli 4, 4, 5 +; PPC-NEXT: sub 3, 3, 4 +; PPC-NEXT: cntlzw 3, 3 +; PPC-NEXT: not 3, 3 +; PPC-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC-NEXT: blr +; +; PPC64LE-LABEL: test_urem_odd_setne: +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: lis 4, -13108 +; PPC64LE-NEXT: clrlwi 3, 3, 28 +; PPC64LE-NEXT: ori 4, 4, 52429 +; PPC64LE-NEXT: mulhwu 4, 3, 4 +; PPC64LE-NEXT: rlwinm 5, 4, 0, 0, 29 +; PPC64LE-NEXT: srwi 4, 4, 2 +; PPC64LE-NEXT: add 4, 4, 5 +; PPC64LE-NEXT: sub 3, 3, 4 +; PPC64LE-NEXT: cntlzw 3, 3 +; PPC64LE-NEXT: not 3, 3 +; PPC64LE-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC64LE-NEXT: blr + %urem = urem i4 %X, 5 + %cmp = icmp ne i4 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_negative_odd(i9 %X) nounwind { +; PPC-LABEL: test_urem_negative_odd: +; PPC: # %bb.0: +; PPC-NEXT: lis 4, 8272 +; PPC-NEXT: clrlwi 3, 3, 23 +; PPC-NEXT: ori 4, 4, 51705 +; PPC-NEXT: mulhwu 4, 3, 4 +; PPC-NEXT: srwi 4, 4, 6 +; PPC-NEXT: mulli 4, 4, 507 +; PPC-NEXT: sub 3, 3, 4 +; PPC-NEXT: cntlzw 3, 3 +; PPC-NEXT: not 3, 3 +; PPC-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC-NEXT: blr +; +; PPC64LE-LABEL: test_urem_negative_odd: +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: lis 4, 8272 +; PPC64LE-NEXT: clrlwi 3, 3, 23 +; PPC64LE-NEXT: ori 4, 4, 51705 +; PPC64LE-NEXT: mulhwu 4, 3, 4 +; PPC64LE-NEXT: srwi 4, 4, 6 +; PPC64LE-NEXT: mulli 4, 4, 507 +; PPC64LE-NEXT: sub 3, 3, 4 +; PPC64LE-NEXT: cntlzw 3, 3 +; PPC64LE-NEXT: not 3, 3 +; PPC64LE-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC64LE-NEXT: blr + %urem = urem i9 %X, -5 + %cmp = icmp ne i9 %urem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { +; PPC-LABEL: test_urem_vec: +; PPC: # %bb.0: +; PPC-NEXT: lis 6, -31983 +; PPC-NEXT: clrlwi 5, 5, 21 +; PPC-NEXT: ori 6, 6, 60211 +; PPC-NEXT: mullw 5, 5, 6 +; PPC-NEXT: lis 6, 32 +; PPC-NEXT: addi 5, 5, 10650 +; PPC-NEXT: ori 6, 6, 5132 +; PPC-NEXT: addis 5, 5, -1572 +; PPC-NEXT: cmplw 5, 6 +; PPC-NEXT: lis 6, -18725 +; PPC-NEXT: clrlwi 4, 4, 21 +; PPC-NEXT: ori 6, 6, 28087 +; PPC-NEXT: lis 5, -21846 +; PPC-NEXT: mullw 4, 4, 6 +; PPC-NEXT: lis 6, 9362 +; PPC-NEXT: clrlwi 3, 3, 21 +; PPC-NEXT: ori 5, 5, 43691 +; PPC-NEXT: addi 4, 4, -28087 +; PPC-NEXT: ori 6, 6, 18724 +; PPC-NEXT: mulhwu 5, 3, 5 +; PPC-NEXT: addis 4, 4, 18725 +; PPC-NEXT: cmplw 1, 4, 6 +; PPC-NEXT: srwi 4, 5, 2 +; PPC-NEXT: li 6, 0 +; PPC-NEXT: li 7, 1 +; PPC-NEXT: mulli 4, 4, 6 +; PPC-NEXT: sub 3, 3, 4 +; PPC-NEXT: cntlzw 3, 3 +; PPC-NEXT: not 3, 3 +; PPC-NEXT: bc 12, 5, .LBB4_2 +; PPC-NEXT: # %bb.1: +; PPC-NEXT: ori 4, 6, 0 +; PPC-NEXT: b .LBB4_3 +; PPC-NEXT: .LBB4_2: +; PPC-NEXT: addi 4, 7, 0 +; PPC-NEXT: .LBB4_3: +; PPC-NEXT: bc 12, 1, .LBB4_5 +; PPC-NEXT: # %bb.4: +; PPC-NEXT: ori 5, 6, 0 +; PPC-NEXT: b .LBB4_6 +; PPC-NEXT: .LBB4_5: +; PPC-NEXT: addi 5, 7, 0 +; PPC-NEXT: .LBB4_6: +; PPC-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC-NEXT: blr +; +; PPC64LE-LABEL: test_urem_vec: +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: lis 6, 9362 +; PPC64LE-NEXT: lis 7, -21846 +; PPC64LE-NEXT: clrlwi 4, 4, 21 +; PPC64LE-NEXT: clrlwi 3, 3, 21 +; PPC64LE-NEXT: lis 8, 160 +; PPC64LE-NEXT: clrlwi 5, 5, 21 +; PPC64LE-NEXT: ori 6, 6, 18725 +; PPC64LE-NEXT: ori 7, 7, 43691 +; PPC64LE-NEXT: ori 8, 8, 25663 +; PPC64LE-NEXT: vspltisw 4, -11 +; PPC64LE-NEXT: mulhwu 6, 4, 6 +; PPC64LE-NEXT: mulhwu 7, 3, 7 +; PPC64LE-NEXT: mulhwu 8, 5, 8 +; PPC64LE-NEXT: sub 9, 4, 6 +; PPC64LE-NEXT: srwi 7, 7, 2 +; PPC64LE-NEXT: srwi 9, 9, 1 +; PPC64LE-NEXT: mulli 7, 7, 6 +; PPC64LE-NEXT: add 6, 9, 6 +; PPC64LE-NEXT: srwi 9, 6, 2 +; PPC64LE-NEXT: rlwinm 6, 6, 1, 0, 28 +; PPC64LE-NEXT: sub 6, 9, 6 +; PPC64LE-NEXT: sub 9, 5, 8 +; PPC64LE-NEXT: add 4, 4, 6 +; PPC64LE-NEXT: srwi 6, 9, 1 +; PPC64LE-NEXT: sub 3, 3, 7 +; PPC64LE-NEXT: add 6, 6, 8 +; PPC64LE-NEXT: mtvsrwz 34, 4 +; PPC64LE-NEXT: srwi 4, 6, 10 +; PPC64LE-NEXT: mtvsrwz 35, 3 +; PPC64LE-NEXT: mulli 3, 4, 2043 +; PPC64LE-NEXT: addis 4, 2, .LCPI4_0@toc@ha +; PPC64LE-NEXT: vmrghw 2, 2, 3 +; PPC64LE-NEXT: addi 4, 4, .LCPI4_0@toc@l +; PPC64LE-NEXT: lvx 3, 0, 4 +; PPC64LE-NEXT: sub 3, 5, 3 +; PPC64LE-NEXT: mtvsrwz 37, 3 +; PPC64LE-NEXT: addis 3, 2, .LCPI4_1@toc@ha +; PPC64LE-NEXT: addi 3, 3, .LCPI4_1@toc@l +; PPC64LE-NEXT: vperm 2, 5, 2, 3 +; PPC64LE-NEXT: vsrw 3, 4, 4 +; PPC64LE-NEXT: lvx 4, 0, 3 +; PPC64LE-NEXT: xxland 34, 34, 35 +; PPC64LE-NEXT: vcmpequw 2, 2, 4 +; PPC64LE-NEXT: xxlnor 0, 34, 34 +; PPC64LE-NEXT: xxswapd 1, 0 +; PPC64LE-NEXT: xxsldwi 2, 0, 0, 1 +; PPC64LE-NEXT: mffprwz 5, 0 +; PPC64LE-NEXT: mffprwz 3, 1 +; PPC64LE-NEXT: mffprwz 4, 2 +; PPC64LE-NEXT: blr + %urem = urem <3 x i11> %X, + %cmp = icmp ne <3 x i11> %urem, + ret <3 x i1> %cmp +} + +define i1 @test_urem_oversized(i66 %X) nounwind { +; PPC-LABEL: test_urem_oversized: +; PPC: # %bb.0: +; PPC-NEXT: mflr 0 +; PPC-NEXT: stw 0, 4(1) +; PPC-NEXT: stwu 1, -16(1) +; PPC-NEXT: mr 6, 5 +; PPC-NEXT: mr 5, 4 +; PPC-NEXT: clrlwi 4, 3, 30 +; PPC-NEXT: lis 3, 18838 +; PPC-NEXT: ori 10, 3, 722 +; PPC-NEXT: li 3, 0 +; PPC-NEXT: li 7, 0 +; PPC-NEXT: li 8, 0 +; PPC-NEXT: li 9, 0 +; PPC-NEXT: bl __umodti3 +; PPC-NEXT: or 3, 5, 3 +; PPC-NEXT: or 4, 6, 4 +; PPC-NEXT: or 3, 4, 3 +; PPC-NEXT: cntlzw 3, 3 +; PPC-NEXT: rlwinm 3, 3, 27, 31, 31 +; PPC-NEXT: lwz 0, 20(1) +; PPC-NEXT: addi 1, 1, 16 +; PPC-NEXT: mtlr 0 +; PPC-NEXT: blr +; +; PPC64LE-LABEL: test_urem_oversized: +; PPC64LE: # %bb.0: +; PPC64LE-NEXT: mflr 0 +; PPC64LE-NEXT: std 0, 16(1) +; PPC64LE-NEXT: stdu 1, -32(1) +; PPC64LE-NEXT: lis 5, 18838 +; PPC64LE-NEXT: clrldi 4, 4, 62 +; PPC64LE-NEXT: li 6, 0 +; PPC64LE-NEXT: ori 5, 5, 722 +; PPC64LE-NEXT: bl __umodti3 +; PPC64LE-NEXT: nop +; PPC64LE-NEXT: or 3, 3, 4 +; PPC64LE-NEXT: cntlzd 3, 3 +; PPC64LE-NEXT: rldicl 3, 3, 58, 63 +; PPC64LE-NEXT: addi 1, 1, 32 +; PPC64LE-NEXT: ld 0, 16(1) +; PPC64LE-NEXT: mtlr 0 +; PPC64LE-NEXT: blr + %urem = urem i66 %X, 1234567890 + %cmp = icmp eq i66 %urem, 0 + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -0,0 +1,867 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m < %s | FileCheck %s --check-prefixes=RV32M +; RUN: llc -mtriple=riscv64 -mattr=+m < %s | FileCheck %s --check-prefixes=RV64M +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV32MV +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV64MV + +define i1 @test_srem_odd(i29 %X) nounwind { +; RV32-LABEL: test_srem_odd: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: srai a0, a0, 3 +; RV32-NEXT: addi a1, zero, 99 +; RV32-NEXT: call __modsi3@plt +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_srem_odd: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a0, a0, 35 +; RV64-NEXT: srai a0, a0, 35 +; RV64-NEXT: addi a1, zero, 99 +; RV64-NEXT: call __moddi3@plt +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret +; +; RV32M-LABEL: test_srem_odd: +; RV32M: # %bb.0: +; RV32M-NEXT: slli a0, a0, 3 +; RV32M-NEXT: srai a0, a0, 3 +; RV32M-NEXT: lui a1, 783784 +; RV32M-NEXT: addi a1, a1, 331 +; RV32M-NEXT: mul a0, a0, a1 +; RV32M-NEXT: lui a1, 5296 +; RV32M-NEXT: addi a1, a1, -662 +; RV32M-NEXT: add a0, a0, a1 +; RV32M-NEXT: lui a1, 10592 +; RV32M-NEXT: addi a1, a1, -1323 +; RV32M-NEXT: sltu a0, a0, a1 +; RV32M-NEXT: ret +; +; RV64M-LABEL: test_srem_odd: +; RV64M: # %bb.0: +; RV64M-NEXT: slli a0, a0, 35 +; RV64M-NEXT: srai a0, a0, 35 +; RV64M-NEXT: lui a1, 1048536 +; RV64M-NEXT: addiw a1, a1, -331 +; RV64M-NEXT: slli a1, a1, 15 +; RV64M-NEXT: addi a1, a1, 331 +; RV64M-NEXT: slli a1, a1, 15 +; RV64M-NEXT: addi a1, a1, -331 +; RV64M-NEXT: slli a1, a1, 15 +; RV64M-NEXT: addi a1, a1, 331 +; RV64M-NEXT: mul a0, a0, a1 +; RV64M-NEXT: lui a1, 331 +; RV64M-NEXT: addiw a1, a1, -41 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, -1531 +; RV64M-NEXT: slli a2, a1, 12 +; RV64M-NEXT: addi a2, a2, 703 +; RV64M-NEXT: slli a2, a2, 12 +; RV64M-NEXT: addi a2, a2, 1448 +; RV64M-NEXT: add a0, a0, a2 +; RV64M-NEXT: slli a1, a1, 13 +; RV64M-NEXT: addi a1, a1, 1407 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, -1199 +; RV64M-NEXT: sltu a0, a0, a1 +; RV64M-NEXT: ret +; +; RV32MV-LABEL: test_srem_odd: +; RV32MV: # %bb.0: +; RV32MV-NEXT: slli a0, a0, 3 +; RV32MV-NEXT: srai a0, a0, 3 +; RV32MV-NEXT: lui a1, 783784 +; RV32MV-NEXT: addi a1, a1, 331 +; RV32MV-NEXT: mul a0, a0, a1 +; RV32MV-NEXT: lui a1, 5296 +; RV32MV-NEXT: addi a1, a1, -662 +; RV32MV-NEXT: add a0, a0, a1 +; RV32MV-NEXT: lui a1, 10592 +; RV32MV-NEXT: addi a1, a1, -1323 +; RV32MV-NEXT: sltu a0, a0, a1 +; RV32MV-NEXT: ret +; +; RV64MV-LABEL: test_srem_odd: +; RV64MV: # %bb.0: +; RV64MV-NEXT: slli a0, a0, 35 +; RV64MV-NEXT: srai a0, a0, 35 +; RV64MV-NEXT: lui a1, 1048536 +; RV64MV-NEXT: addiw a1, a1, -331 +; RV64MV-NEXT: slli a1, a1, 15 +; RV64MV-NEXT: addi a1, a1, 331 +; RV64MV-NEXT: slli a1, a1, 15 +; RV64MV-NEXT: addi a1, a1, -331 +; RV64MV-NEXT: slli a1, a1, 15 +; RV64MV-NEXT: addi a1, a1, 331 +; RV64MV-NEXT: mul a0, a0, a1 +; RV64MV-NEXT: lui a1, 331 +; RV64MV-NEXT: addiw a1, a1, -41 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -1531 +; RV64MV-NEXT: slli a2, a1, 12 +; RV64MV-NEXT: addi a2, a2, 703 +; RV64MV-NEXT: slli a2, a2, 12 +; RV64MV-NEXT: addi a2, a2, 1448 +; RV64MV-NEXT: add a0, a0, a2 +; RV64MV-NEXT: slli a1, a1, 13 +; RV64MV-NEXT: addi a1, a1, 1407 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -1199 +; RV64MV-NEXT: sltu a0, a0, a1 +; RV64MV-NEXT: ret + %srem = srem i29 %X, 99 + %cmp = icmp eq i29 %srem, 0 + ret i1 %cmp +} + +define i1 @test_srem_even(i4 %X) nounwind { +; RV32-LABEL: test_srem_even: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: slli a0, a0, 28 +; RV32-NEXT: srai a0, a0, 28 +; RV32-NEXT: addi a1, zero, 6 +; RV32-NEXT: call __modsi3@plt +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_srem_even: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: slli a0, a0, 60 +; RV64-NEXT: srai a0, a0, 60 +; RV64-NEXT: addi a1, zero, 6 +; RV64-NEXT: call __moddi3@plt +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret +; +; RV32M-LABEL: test_srem_even: +; RV32M: # %bb.0: +; RV32M-NEXT: slli a0, a0, 28 +; RV32M-NEXT: srai a0, a0, 28 +; RV32M-NEXT: lui a1, 174763 +; RV32M-NEXT: addi a1, a1, -1365 +; RV32M-NEXT: mulh a1, a0, a1 +; RV32M-NEXT: srli a2, a1, 31 +; RV32M-NEXT: add a1, a1, a2 +; RV32M-NEXT: addi a2, zero, 6 +; RV32M-NEXT: mul a1, a1, a2 +; RV32M-NEXT: sub a0, a0, a1 +; RV32M-NEXT: addi a0, a0, -1 +; RV32M-NEXT: seqz a0, a0 +; RV32M-NEXT: ret +; +; RV64M-LABEL: test_srem_even: +; RV64M: # %bb.0: +; RV64M-NEXT: slli a0, a0, 60 +; RV64M-NEXT: srai a0, a0, 60 +; RV64M-NEXT: lui a1, 10923 +; RV64M-NEXT: addiw a1, a1, -1365 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, -1365 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, -1365 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, -1365 +; RV64M-NEXT: mulh a1, a0, a1 +; RV64M-NEXT: srli a2, a1, 63 +; RV64M-NEXT: add a1, a1, a2 +; RV64M-NEXT: addi a2, zero, 6 +; RV64M-NEXT: mul a1, a1, a2 +; RV64M-NEXT: sub a0, a0, a1 +; RV64M-NEXT: addi a0, a0, -1 +; RV64M-NEXT: seqz a0, a0 +; RV64M-NEXT: ret +; +; RV32MV-LABEL: test_srem_even: +; RV32MV: # %bb.0: +; RV32MV-NEXT: slli a0, a0, 28 +; RV32MV-NEXT: srai a0, a0, 28 +; RV32MV-NEXT: lui a1, 174763 +; RV32MV-NEXT: addi a1, a1, -1365 +; RV32MV-NEXT: mulh a1, a0, a1 +; RV32MV-NEXT: srli a2, a1, 31 +; RV32MV-NEXT: add a1, a1, a2 +; RV32MV-NEXT: addi a2, zero, 6 +; RV32MV-NEXT: mul a1, a1, a2 +; RV32MV-NEXT: sub a0, a0, a1 +; RV32MV-NEXT: addi a0, a0, -1 +; RV32MV-NEXT: seqz a0, a0 +; RV32MV-NEXT: ret +; +; RV64MV-LABEL: test_srem_even: +; RV64MV: # %bb.0: +; RV64MV-NEXT: slli a0, a0, 60 +; RV64MV-NEXT: srai a0, a0, 60 +; RV64MV-NEXT: lui a1, 10923 +; RV64MV-NEXT: addiw a1, a1, -1365 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -1365 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -1365 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -1365 +; RV64MV-NEXT: mulh a1, a0, a1 +; RV64MV-NEXT: srli a2, a1, 63 +; RV64MV-NEXT: add a1, a1, a2 +; RV64MV-NEXT: addi a2, zero, 6 +; RV64MV-NEXT: mul a1, a1, a2 +; RV64MV-NEXT: sub a0, a0, a1 +; RV64MV-NEXT: addi a0, a0, -1 +; RV64MV-NEXT: seqz a0, a0 +; RV64MV-NEXT: ret + %srem = srem i4 %X, 6 + %cmp = icmp eq i4 %srem, 1 + ret i1 %cmp +} + +define i1 @test_srem_pow2_setne(i6 %X) nounwind { +; RV32-LABEL: test_srem_pow2_setne: +; RV32: # %bb.0: +; RV32-NEXT: slli a1, a0, 26 +; RV32-NEXT: srai a1, a1, 26 +; RV32-NEXT: srli a1, a1, 9 +; RV32-NEXT: andi a1, a1, 3 +; RV32-NEXT: add a1, a0, a1 +; RV32-NEXT: andi a1, a1, 60 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: andi a0, a0, 63 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: test_srem_pow2_setne: +; RV64: # %bb.0: +; RV64-NEXT: slli a1, a0, 58 +; RV64-NEXT: srai a1, a1, 58 +; RV64-NEXT: srli a1, a1, 9 +; RV64-NEXT: andi a1, a1, 3 +; RV64-NEXT: add a1, a0, a1 +; RV64-NEXT: andi a1, a1, 60 +; RV64-NEXT: sub a0, a0, a1 +; RV64-NEXT: andi a0, a0, 63 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: ret +; +; RV32M-LABEL: test_srem_pow2_setne: +; RV32M: # %bb.0: +; RV32M-NEXT: slli a1, a0, 26 +; RV32M-NEXT: srai a1, a1, 26 +; RV32M-NEXT: srli a1, a1, 9 +; RV32M-NEXT: andi a1, a1, 3 +; RV32M-NEXT: add a1, a0, a1 +; RV32M-NEXT: andi a1, a1, 60 +; RV32M-NEXT: sub a0, a0, a1 +; RV32M-NEXT: andi a0, a0, 63 +; RV32M-NEXT: snez a0, a0 +; RV32M-NEXT: ret +; +; RV64M-LABEL: test_srem_pow2_setne: +; RV64M: # %bb.0: +; RV64M-NEXT: slli a1, a0, 58 +; RV64M-NEXT: srai a1, a1, 58 +; RV64M-NEXT: srli a1, a1, 9 +; RV64M-NEXT: andi a1, a1, 3 +; RV64M-NEXT: add a1, a0, a1 +; RV64M-NEXT: andi a1, a1, 60 +; RV64M-NEXT: sub a0, a0, a1 +; RV64M-NEXT: andi a0, a0, 63 +; RV64M-NEXT: snez a0, a0 +; RV64M-NEXT: ret +; +; RV32MV-LABEL: test_srem_pow2_setne: +; RV32MV: # %bb.0: +; RV32MV-NEXT: slli a1, a0, 26 +; RV32MV-NEXT: srai a1, a1, 26 +; RV32MV-NEXT: srli a1, a1, 9 +; RV32MV-NEXT: andi a1, a1, 3 +; RV32MV-NEXT: add a1, a0, a1 +; RV32MV-NEXT: andi a1, a1, 60 +; RV32MV-NEXT: sub a0, a0, a1 +; RV32MV-NEXT: andi a0, a0, 63 +; RV32MV-NEXT: snez a0, a0 +; RV32MV-NEXT: ret +; +; RV64MV-LABEL: test_srem_pow2_setne: +; RV64MV: # %bb.0: +; RV64MV-NEXT: slli a1, a0, 58 +; RV64MV-NEXT: srai a1, a1, 58 +; RV64MV-NEXT: srli a1, a1, 9 +; RV64MV-NEXT: andi a1, a1, 3 +; RV64MV-NEXT: add a1, a0, a1 +; RV64MV-NEXT: andi a1, a1, 60 +; RV64MV-NEXT: sub a0, a0, a1 +; RV64MV-NEXT: andi a0, a0, 63 +; RV64MV-NEXT: snez a0, a0 +; RV64MV-NEXT: ret + %srem = srem i6 %X, 4 + %cmp = icmp ne i6 %srem, 0 + ret i1 %cmp +} + +define void @test_srem_vec(<3 x i33>* %X) nounwind { +; RV32-LABEL: test_srem_vec: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: lw a0, 4(a0) +; RV32-NEXT: lbu a1, 12(s0) +; RV32-NEXT: lw a2, 8(s0) +; RV32-NEXT: andi a3, a0, 1 +; RV32-NEXT: neg s2, a3 +; RV32-NEXT: slli a3, a1, 30 +; RV32-NEXT: srli a4, a2, 2 +; RV32-NEXT: or s3, a4, a3 +; RV32-NEXT: srli a1, a1, 2 +; RV32-NEXT: andi a1, a1, 1 +; RV32-NEXT: neg s1, a1 +; RV32-NEXT: slli a1, a2, 31 +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: lw s4, 0(s0) +; RV32-NEXT: srli a1, a2, 1 +; RV32-NEXT: andi a1, a1, 1 +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: addi a2, zero, 7 +; RV32-NEXT: mv a3, zero +; RV32-NEXT: call __moddi3@plt +; RV32-NEXT: mv s5, a0 +; RV32-NEXT: mv s6, a1 +; RV32-NEXT: addi a2, zero, -5 +; RV32-NEXT: addi a3, zero, -1 +; RV32-NEXT: mv a0, s3 +; RV32-NEXT: mv a1, s1 +; RV32-NEXT: call __moddi3@plt +; RV32-NEXT: mv s1, a0 +; RV32-NEXT: mv s3, a1 +; RV32-NEXT: addi a2, zero, 6 +; RV32-NEXT: mv a0, s4 +; RV32-NEXT: mv a1, s2 +; RV32-NEXT: mv a3, zero +; RV32-NEXT: call __moddi3@plt +; RV32-NEXT: xori a2, s1, 2 +; RV32-NEXT: or a2, a2, s3 +; RV32-NEXT: snez a2, a2 +; RV32-NEXT: xori a3, s5, 1 +; RV32-NEXT: or a3, a3, s6 +; RV32-NEXT: snez a3, a3 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: neg a1, a3 +; RV32-NEXT: neg a4, a2 +; RV32-NEXT: neg a5, a0 +; RV32-NEXT: sw a5, 0(s0) +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: sub a0, a0, a3 +; RV32-NEXT: sw a0, 4(s0) +; RV32-NEXT: slli a0, a2, 2 +; RV32-NEXT: srli a2, a4, 30 +; RV32-NEXT: sub a2, a2, a0 +; RV32-NEXT: andi a2, a2, 7 +; RV32-NEXT: sb a2, 12(s0) +; RV32-NEXT: srli a2, a1, 31 +; RV32-NEXT: andi a1, a1, 1 +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: or a1, a2, a1 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sw a0, 8(s0) +; RV32-NEXT: lw s6, 0(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s5, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: test_srem_vec: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s4, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s5, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: mv s0, a0 +; RV64-NEXT: lb a0, 12(a0) +; RV64-NEXT: lwu a1, 8(s0) +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: addi s4, zero, 1 +; RV64-NEXT: slli a1, s4, 40 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: ld a1, 0(s0) +; RV64-NEXT: slli a2, a0, 29 +; RV64-NEXT: srai s2, a2, 31 +; RV64-NEXT: slli a0, a0, 31 +; RV64-NEXT: srli a2, a1, 33 +; RV64-NEXT: or a0, a2, a0 +; RV64-NEXT: slli a0, a0, 31 +; RV64-NEXT: srai s1, a0, 31 +; RV64-NEXT: slli a0, a1, 31 +; RV64-NEXT: srai a0, a0, 31 +; RV64-NEXT: addi a1, zero, 6 +; RV64-NEXT: call __moddi3@plt +; RV64-NEXT: mv s3, a0 +; RV64-NEXT: addi a1, zero, 7 +; RV64-NEXT: addi s5, zero, 7 +; RV64-NEXT: mv a0, s1 +; RV64-NEXT: call __moddi3@plt +; RV64-NEXT: mv s1, a0 +; RV64-NEXT: addi a1, zero, -5 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __moddi3@plt +; RV64-NEXT: addi a0, a0, -2 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: addi a1, s1, -1 +; RV64-NEXT: snez a1, a1 +; RV64-NEXT: snez a2, s3 +; RV64-NEXT: neg a2, a2 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: neg a3, a0 +; RV64-NEXT: slli a4, s5, 32 +; RV64-NEXT: and a3, a3, a4 +; RV64-NEXT: srli a3, a3, 32 +; RV64-NEXT: sb a3, 12(s0) +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: slli a3, s4, 33 +; RV64-NEXT: addi a3, a3, -1 +; RV64-NEXT: and a1, a1, a3 +; RV64-NEXT: srli a4, a1, 31 +; RV64-NEXT: sub a0, a4, a0 +; RV64-NEXT: sw a0, 8(s0) +; RV64-NEXT: and a0, a2, a3 +; RV64-NEXT: slli a1, a1, 33 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: sd a0, 0(s0) +; RV64-NEXT: ld s5, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s4, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s3, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret +; +; RV32M-LABEL: test_srem_vec: +; RV32M: # %bb.0: +; RV32M-NEXT: addi sp, sp, -32 +; RV32M-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32M-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32M-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32M-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32M-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32M-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill +; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill +; RV32M-NEXT: mv s0, a0 +; RV32M-NEXT: lw a0, 4(a0) +; RV32M-NEXT: lbu a1, 12(s0) +; RV32M-NEXT: lw a2, 8(s0) +; RV32M-NEXT: andi a3, a0, 1 +; RV32M-NEXT: neg s2, a3 +; RV32M-NEXT: slli a3, a1, 30 +; RV32M-NEXT: srli a4, a2, 2 +; RV32M-NEXT: or s3, a4, a3 +; RV32M-NEXT: srli a1, a1, 2 +; RV32M-NEXT: andi a1, a1, 1 +; RV32M-NEXT: neg s1, a1 +; RV32M-NEXT: slli a1, a2, 31 +; RV32M-NEXT: srli a0, a0, 1 +; RV32M-NEXT: or a0, a0, a1 +; RV32M-NEXT: lw s4, 0(s0) +; RV32M-NEXT: srli a1, a2, 1 +; RV32M-NEXT: andi a1, a1, 1 +; RV32M-NEXT: neg a1, a1 +; RV32M-NEXT: addi a2, zero, 7 +; RV32M-NEXT: mv a3, zero +; RV32M-NEXT: call __moddi3@plt +; RV32M-NEXT: mv s5, a0 +; RV32M-NEXT: mv s6, a1 +; RV32M-NEXT: addi a2, zero, -5 +; RV32M-NEXT: addi a3, zero, -1 +; RV32M-NEXT: mv a0, s3 +; RV32M-NEXT: mv a1, s1 +; RV32M-NEXT: call __moddi3@plt +; RV32M-NEXT: mv s1, a0 +; RV32M-NEXT: mv s3, a1 +; RV32M-NEXT: addi a2, zero, 6 +; RV32M-NEXT: mv a0, s4 +; RV32M-NEXT: mv a1, s2 +; RV32M-NEXT: mv a3, zero +; RV32M-NEXT: call __moddi3@plt +; RV32M-NEXT: xori a2, s1, 2 +; RV32M-NEXT: or a2, a2, s3 +; RV32M-NEXT: snez a2, a2 +; RV32M-NEXT: xori a3, s5, 1 +; RV32M-NEXT: or a3, a3, s6 +; RV32M-NEXT: snez a3, a3 +; RV32M-NEXT: or a0, a0, a1 +; RV32M-NEXT: snez a0, a0 +; RV32M-NEXT: neg a1, a3 +; RV32M-NEXT: neg a4, a2 +; RV32M-NEXT: neg a5, a0 +; RV32M-NEXT: sw a5, 0(s0) +; RV32M-NEXT: slli a3, a3, 1 +; RV32M-NEXT: sub a0, a0, a3 +; RV32M-NEXT: sw a0, 4(s0) +; RV32M-NEXT: slli a0, a2, 2 +; RV32M-NEXT: srli a2, a4, 30 +; RV32M-NEXT: sub a2, a2, a0 +; RV32M-NEXT: andi a2, a2, 7 +; RV32M-NEXT: sb a2, 12(s0) +; RV32M-NEXT: srli a2, a1, 31 +; RV32M-NEXT: andi a1, a1, 1 +; RV32M-NEXT: slli a1, a1, 1 +; RV32M-NEXT: or a1, a2, a1 +; RV32M-NEXT: sub a0, a1, a0 +; RV32M-NEXT: sw a0, 8(s0) +; RV32M-NEXT: lw s6, 0(sp) # 4-byte Folded Reload +; RV32M-NEXT: lw s5, 4(sp) # 4-byte Folded Reload +; RV32M-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; RV32M-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32M-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32M-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32M-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32M-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32M-NEXT: addi sp, sp, 32 +; RV32M-NEXT: ret +; +; RV64M-LABEL: test_srem_vec: +; RV64M: # %bb.0: +; RV64M-NEXT: lb a1, 12(a0) +; RV64M-NEXT: lwu a2, 8(a0) +; RV64M-NEXT: slli a1, a1, 32 +; RV64M-NEXT: or a2, a2, a1 +; RV64M-NEXT: addi a6, zero, 1 +; RV64M-NEXT: slli a3, a6, 40 +; RV64M-NEXT: addi a3, a3, -1 +; RV64M-NEXT: and a2, a2, a3 +; RV64M-NEXT: ld a3, 0(a0) +; RV64M-NEXT: slli a4, a2, 29 +; RV64M-NEXT: srai a4, a4, 31 +; RV64M-NEXT: slli a2, a2, 31 +; RV64M-NEXT: srli a5, a3, 33 +; RV64M-NEXT: or a2, a5, a2 +; RV64M-NEXT: slli a2, a2, 31 +; RV64M-NEXT: srai a2, a2, 31 +; RV64M-NEXT: slli a3, a3, 31 +; RV64M-NEXT: srai a3, a3, 31 +; RV64M-NEXT: lui a5, 18725 +; RV64M-NEXT: addiw a5, a5, -1755 +; RV64M-NEXT: slli a5, a5, 12 +; RV64M-NEXT: addi a5, a5, -1755 +; RV64M-NEXT: slli a5, a5, 12 +; RV64M-NEXT: addi a5, a5, -1755 +; RV64M-NEXT: slli a5, a5, 12 +; RV64M-NEXT: addi a5, a5, -1755 +; RV64M-NEXT: mulh a5, a2, a5 +; RV64M-NEXT: srli a1, a5, 63 +; RV64M-NEXT: srai a5, a5, 1 +; RV64M-NEXT: add a1, a5, a1 +; RV64M-NEXT: slli a5, a1, 3 +; RV64M-NEXT: sub a1, a1, a5 +; RV64M-NEXT: add a1, a2, a1 +; RV64M-NEXT: lui a2, 1035469 +; RV64M-NEXT: addiw a2, a2, -819 +; RV64M-NEXT: slli a2, a2, 12 +; RV64M-NEXT: addi a2, a2, -819 +; RV64M-NEXT: slli a2, a2, 12 +; RV64M-NEXT: addi a2, a2, -819 +; RV64M-NEXT: slli a2, a2, 13 +; RV64M-NEXT: addi a2, a2, -1639 +; RV64M-NEXT: mulh a2, a4, a2 +; RV64M-NEXT: srli a5, a2, 63 +; RV64M-NEXT: srai a2, a2, 1 +; RV64M-NEXT: add a2, a2, a5 +; RV64M-NEXT: slli a5, a2, 2 +; RV64M-NEXT: add a2, a5, a2 +; RV64M-NEXT: add a2, a4, a2 +; RV64M-NEXT: lui a4, 10923 +; RV64M-NEXT: addiw a4, a4, -1365 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, -1365 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, -1365 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, -1365 +; RV64M-NEXT: mulh a4, a3, a4 +; RV64M-NEXT: srli a5, a4, 63 +; RV64M-NEXT: add a4, a4, a5 +; RV64M-NEXT: addi a5, zero, 6 +; RV64M-NEXT: mul a4, a4, a5 +; RV64M-NEXT: sub a3, a3, a4 +; RV64M-NEXT: addi a2, a2, -2 +; RV64M-NEXT: snez a2, a2 +; RV64M-NEXT: addi a1, a1, -1 +; RV64M-NEXT: snez a1, a1 +; RV64M-NEXT: snez a3, a3 +; RV64M-NEXT: neg a1, a1 +; RV64M-NEXT: neg a4, a2 +; RV64M-NEXT: neg a3, a3 +; RV64M-NEXT: addi a5, zero, 7 +; RV64M-NEXT: slli a5, a5, 32 +; RV64M-NEXT: and a4, a4, a5 +; RV64M-NEXT: srli a4, a4, 32 +; RV64M-NEXT: sb a4, 12(a0) +; RV64M-NEXT: slli a2, a2, 2 +; RV64M-NEXT: slli a4, a6, 33 +; RV64M-NEXT: addi a4, a4, -1 +; RV64M-NEXT: and a1, a1, a4 +; RV64M-NEXT: srli a5, a1, 31 +; RV64M-NEXT: sub a2, a5, a2 +; RV64M-NEXT: sw a2, 8(a0) +; RV64M-NEXT: slli a1, a1, 33 +; RV64M-NEXT: and a2, a3, a4 +; RV64M-NEXT: or a1, a2, a1 +; RV64M-NEXT: sd a1, 0(a0) +; RV64M-NEXT: ret +; +; RV32MV-LABEL: test_srem_vec: +; RV32MV: # %bb.0: +; RV32MV-NEXT: addi sp, sp, -64 +; RV32MV-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s1, 52(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s2, 48(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s3, 44(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s4, 40(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s5, 36(sp) # 4-byte Folded Spill +; RV32MV-NEXT: addi s0, sp, 64 +; RV32MV-NEXT: andi sp, sp, -32 +; RV32MV-NEXT: mv s1, a0 +; RV32MV-NEXT: lw a0, 8(a0) +; RV32MV-NEXT: lw a1, 4(s1) +; RV32MV-NEXT: slli a2, a0, 31 +; RV32MV-NEXT: srli a3, a1, 1 +; RV32MV-NEXT: or s2, a3, a2 +; RV32MV-NEXT: lbu a2, 12(s1) +; RV32MV-NEXT: srli a3, a0, 1 +; RV32MV-NEXT: andi a3, a3, 1 +; RV32MV-NEXT: neg s3, a3 +; RV32MV-NEXT: slli a3, a2, 30 +; RV32MV-NEXT: srli a0, a0, 2 +; RV32MV-NEXT: or s4, a0, a3 +; RV32MV-NEXT: srli a0, a2, 2 +; RV32MV-NEXT: andi a2, a0, 1 +; RV32MV-NEXT: lw a0, 0(s1) +; RV32MV-NEXT: neg s5, a2 +; RV32MV-NEXT: andi a1, a1, 1 +; RV32MV-NEXT: neg a1, a1 +; RV32MV-NEXT: addi a2, zero, 6 +; RV32MV-NEXT: mv a3, zero +; RV32MV-NEXT: call __moddi3@plt +; RV32MV-NEXT: sw a1, 4(sp) +; RV32MV-NEXT: sw a0, 0(sp) +; RV32MV-NEXT: addi a2, zero, -5 +; RV32MV-NEXT: addi a3, zero, -1 +; RV32MV-NEXT: mv a0, s4 +; RV32MV-NEXT: mv a1, s5 +; RV32MV-NEXT: call __moddi3@plt +; RV32MV-NEXT: sw a1, 20(sp) +; RV32MV-NEXT: sw a0, 16(sp) +; RV32MV-NEXT: addi a2, zero, 7 +; RV32MV-NEXT: mv a0, s2 +; RV32MV-NEXT: mv a1, s3 +; RV32MV-NEXT: mv a3, zero +; RV32MV-NEXT: call __moddi3@plt +; RV32MV-NEXT: sw a1, 12(sp) +; RV32MV-NEXT: sw a0, 8(sp) +; RV32MV-NEXT: lui a0, %hi(.LCPI3_0) +; RV32MV-NEXT: addi a0, a0, %lo(.LCPI3_0) +; RV32MV-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; RV32MV-NEXT: vle32.v v26, (a0) +; RV32MV-NEXT: vle32.v v28, (sp) +; RV32MV-NEXT: lui a0, %hi(.LCPI3_1) +; RV32MV-NEXT: addi a0, a0, %lo(.LCPI3_1) +; RV32MV-NEXT: vle32.v v30, (a0) +; RV32MV-NEXT: vand.vv v26, v28, v26 +; RV32MV-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; RV32MV-NEXT: vmsne.vv v0, v26, v30 +; RV32MV-NEXT: vmv.v.i v26, 0 +; RV32MV-NEXT: vmerge.vim v26, v26, -1, v0 +; RV32MV-NEXT: vsetvli zero, zero, e32,m2,ta,mu +; RV32MV-NEXT: vmv.x.s a0, v26 +; RV32MV-NEXT: sw a0, 0(s1) +; RV32MV-NEXT: vsetivli a0, 1, e32,m2,ta,mu +; RV32MV-NEXT: vslidedown.vi v28, v26, 1 +; RV32MV-NEXT: vmv.x.s a0, v28 +; RV32MV-NEXT: vslidedown.vi v28, v26, 2 +; RV32MV-NEXT: vmv.x.s a1, v28 +; RV32MV-NEXT: slli a2, a1, 1 +; RV32MV-NEXT: sub a0, a2, a0 +; RV32MV-NEXT: sw a0, 4(s1) +; RV32MV-NEXT: vslidedown.vi v28, v26, 4 +; RV32MV-NEXT: vmv.x.s a0, v28 +; RV32MV-NEXT: srli a2, a0, 30 +; RV32MV-NEXT: vslidedown.vi v28, v26, 5 +; RV32MV-NEXT: vmv.x.s a3, v28 +; RV32MV-NEXT: slli a3, a3, 2 +; RV32MV-NEXT: or a2, a3, a2 +; RV32MV-NEXT: andi a2, a2, 7 +; RV32MV-NEXT: sb a2, 12(s1) +; RV32MV-NEXT: srli a1, a1, 31 +; RV32MV-NEXT: vslidedown.vi v26, v26, 3 +; RV32MV-NEXT: vmv.x.s a2, v26 +; RV32MV-NEXT: andi a2, a2, 1 +; RV32MV-NEXT: slli a2, a2, 1 +; RV32MV-NEXT: or a1, a1, a2 +; RV32MV-NEXT: slli a0, a0, 2 +; RV32MV-NEXT: or a0, a1, a0 +; RV32MV-NEXT: sw a0, 8(s1) +; RV32MV-NEXT: addi sp, s0, -64 +; RV32MV-NEXT: lw s5, 36(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s4, 40(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s3, 44(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s2, 48(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s1, 52(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32MV-NEXT: addi sp, sp, 64 +; RV32MV-NEXT: ret +; +; RV64MV-LABEL: test_srem_vec: +; RV64MV: # %bb.0: +; RV64MV-NEXT: addi sp, sp, -64 +; RV64MV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; RV64MV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; RV64MV-NEXT: addi s0, sp, 64 +; RV64MV-NEXT: andi sp, sp, -32 +; RV64MV-NEXT: lb a1, 12(a0) +; RV64MV-NEXT: lwu a2, 8(a0) +; RV64MV-NEXT: slli a1, a1, 32 +; RV64MV-NEXT: or a2, a2, a1 +; RV64MV-NEXT: addi a6, zero, 1 +; RV64MV-NEXT: slli a3, a6, 40 +; RV64MV-NEXT: ld a4, 0(a0) +; RV64MV-NEXT: addi a3, a3, -1 +; RV64MV-NEXT: and a2, a2, a3 +; RV64MV-NEXT: slli a3, a2, 31 +; RV64MV-NEXT: srli a5, a4, 33 +; RV64MV-NEXT: or a3, a5, a3 +; RV64MV-NEXT: slli a3, a3, 31 +; RV64MV-NEXT: srai a3, a3, 31 +; RV64MV-NEXT: slli a2, a2, 29 +; RV64MV-NEXT: srai a2, a2, 31 +; RV64MV-NEXT: slli a4, a4, 31 +; RV64MV-NEXT: srai a4, a4, 31 +; RV64MV-NEXT: lui a5, 10923 +; RV64MV-NEXT: addiw a5, a5, -1365 +; RV64MV-NEXT: slli a5, a5, 12 +; RV64MV-NEXT: addi a5, a5, -1365 +; RV64MV-NEXT: slli a5, a5, 12 +; RV64MV-NEXT: addi a5, a5, -1365 +; RV64MV-NEXT: slli a5, a5, 12 +; RV64MV-NEXT: addi a5, a5, -1365 +; RV64MV-NEXT: mulh a5, a4, a5 +; RV64MV-NEXT: srli a1, a5, 63 +; RV64MV-NEXT: add a1, a5, a1 +; RV64MV-NEXT: addi a5, zero, 6 +; RV64MV-NEXT: mul a1, a1, a5 +; RV64MV-NEXT: sub a1, a4, a1 +; RV64MV-NEXT: sd a1, 0(sp) +; RV64MV-NEXT: lui a1, 1035469 +; RV64MV-NEXT: addiw a1, a1, -819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -819 +; RV64MV-NEXT: slli a1, a1, 13 +; RV64MV-NEXT: addi a1, a1, -1639 +; RV64MV-NEXT: mulh a1, a2, a1 +; RV64MV-NEXT: srli a4, a1, 63 +; RV64MV-NEXT: srai a1, a1, 1 +; RV64MV-NEXT: add a1, a1, a4 +; RV64MV-NEXT: slli a4, a1, 2 +; RV64MV-NEXT: add a1, a4, a1 +; RV64MV-NEXT: add a1, a2, a1 +; RV64MV-NEXT: sd a1, 16(sp) +; RV64MV-NEXT: lui a1, 18725 +; RV64MV-NEXT: addiw a1, a1, -1755 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -1755 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -1755 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -1755 +; RV64MV-NEXT: mulh a1, a3, a1 +; RV64MV-NEXT: srli a2, a1, 63 +; RV64MV-NEXT: srai a1, a1, 1 +; RV64MV-NEXT: add a1, a1, a2 +; RV64MV-NEXT: slli a2, a1, 3 +; RV64MV-NEXT: sub a1, a1, a2 +; RV64MV-NEXT: add a1, a3, a1 +; RV64MV-NEXT: sd a1, 8(sp) +; RV64MV-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; RV64MV-NEXT: vle64.v v26, (sp) +; RV64MV-NEXT: lui a1, %hi(.LCPI3_0) +; RV64MV-NEXT: addi a1, a1, %lo(.LCPI3_0) +; RV64MV-NEXT: vle64.v v28, (a1) +; RV64MV-NEXT: slli a1, a6, 33 +; RV64MV-NEXT: addi a1, a1, -1 +; RV64MV-NEXT: vand.vx v26, v26, a1 +; RV64MV-NEXT: vmsne.vv v0, v26, v28 +; RV64MV-NEXT: vmv.v.i v26, 0 +; RV64MV-NEXT: vmerge.vim v26, v26, -1, v0 +; RV64MV-NEXT: vsetivli a2, 1, e64,m2,ta,mu +; RV64MV-NEXT: vslidedown.vi v28, v26, 2 +; RV64MV-NEXT: vmv.x.s a2, v28 +; RV64MV-NEXT: srli a3, a2, 30 +; RV64MV-NEXT: andi a3, a3, 7 +; RV64MV-NEXT: sb a3, 12(a0) +; RV64MV-NEXT: slli a2, a2, 2 +; RV64MV-NEXT: vslidedown.vi v28, v26, 1 +; RV64MV-NEXT: vmv.x.s a3, v28 +; RV64MV-NEXT: and a3, a3, a1 +; RV64MV-NEXT: srli a4, a3, 31 +; RV64MV-NEXT: or a2, a4, a2 +; RV64MV-NEXT: sw a2, 8(a0) +; RV64MV-NEXT: vmv.x.s a2, v26 +; RV64MV-NEXT: and a1, a2, a1 +; RV64MV-NEXT: slli a2, a3, 33 +; RV64MV-NEXT: or a1, a1, a2 +; RV64MV-NEXT: sd a1, 0(a0) +; RV64MV-NEXT: addi sp, s0, -64 +; RV64MV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; RV64MV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; RV64MV-NEXT: addi sp, sp, 64 +; RV64MV-NEXT: ret + %ld = load <3 x i33>, <3 x i33>* %X + %srem = srem <3 x i33> %ld, + %cmp = icmp ne <3 x i33> %srem, + %ext = sext <3 x i1> %cmp to <3 x i33> + store <3 x i33> %ext, <3 x i33>* %X + ret void +} diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -0,0 +1,838 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=RV32 +; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m < %s | FileCheck %s --check-prefixes=RV32M +; RUN: llc -mtriple=riscv64 -mattr=+m < %s | FileCheck %s --check-prefixes=RV64M +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV32MV +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV64MV + +define i1 @test_urem_odd(i13 %X) nounwind { +; RV32-LABEL: test_urem_odd: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: lui a1, 2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: call __umodsi3@plt +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_odd: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: lui a1, 2 +; RV64-NEXT: addiw a1, a1, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: addi a1, zero, 5 +; RV64-NEXT: call __umoddi3@plt +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret +; +; RV32M-LABEL: test_urem_odd: +; RV32M: # %bb.0: +; RV32M-NEXT: lui a1, 2 +; RV32M-NEXT: addi a1, a1, -1 +; RV32M-NEXT: and a0, a0, a1 +; RV32M-NEXT: lui a1, 838861 +; RV32M-NEXT: addi a1, a1, -819 +; RV32M-NEXT: mul a0, a0, a1 +; RV32M-NEXT: lui a1, 209715 +; RV32M-NEXT: addi a1, a1, 820 +; RV32M-NEXT: sltu a0, a0, a1 +; RV32M-NEXT: ret +; +; RV64M-LABEL: test_urem_odd: +; RV64M: # %bb.0: +; RV64M-NEXT: lui a1, 2 +; RV64M-NEXT: addiw a1, a1, -1 +; RV64M-NEXT: and a0, a0, a1 +; RV64M-NEXT: lui a1, 1035469 +; RV64M-NEXT: addiw a1, a1, -819 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, -819 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, -819 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, -819 +; RV64M-NEXT: mul a0, a0, a1 +; RV64M-NEXT: lui a1, 13107 +; RV64M-NEXT: addiw a1, a1, 819 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, 819 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, 819 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, 820 +; RV64M-NEXT: sltu a0, a0, a1 +; RV64M-NEXT: ret +; +; RV32MV-LABEL: test_urem_odd: +; RV32MV: # %bb.0: +; RV32MV-NEXT: lui a1, 2 +; RV32MV-NEXT: addi a1, a1, -1 +; RV32MV-NEXT: and a0, a0, a1 +; RV32MV-NEXT: lui a1, 838861 +; RV32MV-NEXT: addi a1, a1, -819 +; RV32MV-NEXT: mul a0, a0, a1 +; RV32MV-NEXT: lui a1, 209715 +; RV32MV-NEXT: addi a1, a1, 820 +; RV32MV-NEXT: sltu a0, a0, a1 +; RV32MV-NEXT: ret +; +; RV64MV-LABEL: test_urem_odd: +; RV64MV: # %bb.0: +; RV64MV-NEXT: lui a1, 2 +; RV64MV-NEXT: addiw a1, a1, -1 +; RV64MV-NEXT: and a0, a0, a1 +; RV64MV-NEXT: lui a1, 1035469 +; RV64MV-NEXT: addiw a1, a1, -819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -819 +; RV64MV-NEXT: mul a0, a0, a1 +; RV64MV-NEXT: lui a1, 13107 +; RV64MV-NEXT: addiw a1, a1, 819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, 819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, 819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, 820 +; RV64MV-NEXT: sltu a0, a0, a1 +; RV64MV-NEXT: ret + %urem = urem i13 %X, 5 + %cmp = icmp eq i13 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_even(i27 %X) nounwind { +; RV32-LABEL: test_urem_even: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: lui a1, 32768 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: addi a1, zero, 14 +; RV32-NEXT: call __umodsi3@plt +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_even: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: lui a1, 32768 +; RV64-NEXT: addiw a1, a1, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: addi a1, zero, 14 +; RV64-NEXT: call __umoddi3@plt +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret +; +; RV32M-LABEL: test_urem_even: +; RV32M: # %bb.0: +; RV32M-NEXT: lui a1, 32768 +; RV32M-NEXT: addi a1, a1, -1 +; RV32M-NEXT: and a0, a0, a1 +; RV32M-NEXT: srli a1, a0, 1 +; RV32M-NEXT: lui a2, 599186 +; RV32M-NEXT: addi a2, a2, 1171 +; RV32M-NEXT: mulhu a1, a1, a2 +; RV32M-NEXT: srli a1, a1, 2 +; RV32M-NEXT: addi a2, zero, 14 +; RV32M-NEXT: mul a1, a1, a2 +; RV32M-NEXT: sub a0, a0, a1 +; RV32M-NEXT: seqz a0, a0 +; RV32M-NEXT: ret +; +; RV64M-LABEL: test_urem_even: +; RV64M: # %bb.0: +; RV64M-NEXT: lui a1, 32768 +; RV64M-NEXT: addiw a1, a1, -1 +; RV64M-NEXT: and a0, a0, a1 +; RV64M-NEXT: srli a1, a0, 1 +; RV64M-NEXT: lui a2, 18725 +; RV64M-NEXT: addiw a2, a2, -1755 +; RV64M-NEXT: slli a2, a2, 12 +; RV64M-NEXT: addi a2, a2, -1755 +; RV64M-NEXT: slli a2, a2, 12 +; RV64M-NEXT: addi a2, a2, -1755 +; RV64M-NEXT: slli a2, a2, 12 +; RV64M-NEXT: addi a2, a2, -1755 +; RV64M-NEXT: mulhu a1, a1, a2 +; RV64M-NEXT: srli a1, a1, 1 +; RV64M-NEXT: addi a2, zero, 14 +; RV64M-NEXT: mul a1, a1, a2 +; RV64M-NEXT: sub a0, a0, a1 +; RV64M-NEXT: seqz a0, a0 +; RV64M-NEXT: ret +; +; RV32MV-LABEL: test_urem_even: +; RV32MV: # %bb.0: +; RV32MV-NEXT: lui a1, 32768 +; RV32MV-NEXT: addi a1, a1, -1 +; RV32MV-NEXT: and a0, a0, a1 +; RV32MV-NEXT: srli a1, a0, 1 +; RV32MV-NEXT: lui a2, 599186 +; RV32MV-NEXT: addi a2, a2, 1171 +; RV32MV-NEXT: mulhu a1, a1, a2 +; RV32MV-NEXT: srli a1, a1, 2 +; RV32MV-NEXT: addi a2, zero, 14 +; RV32MV-NEXT: mul a1, a1, a2 +; RV32MV-NEXT: sub a0, a0, a1 +; RV32MV-NEXT: seqz a0, a0 +; RV32MV-NEXT: ret +; +; RV64MV-LABEL: test_urem_even: +; RV64MV: # %bb.0: +; RV64MV-NEXT: lui a1, 32768 +; RV64MV-NEXT: addiw a1, a1, -1 +; RV64MV-NEXT: and a0, a0, a1 +; RV64MV-NEXT: srli a1, a0, 1 +; RV64MV-NEXT: lui a2, 18725 +; RV64MV-NEXT: addiw a2, a2, -1755 +; RV64MV-NEXT: slli a2, a2, 12 +; RV64MV-NEXT: addi a2, a2, -1755 +; RV64MV-NEXT: slli a2, a2, 12 +; RV64MV-NEXT: addi a2, a2, -1755 +; RV64MV-NEXT: slli a2, a2, 12 +; RV64MV-NEXT: addi a2, a2, -1755 +; RV64MV-NEXT: mulhu a1, a1, a2 +; RV64MV-NEXT: srli a1, a1, 1 +; RV64MV-NEXT: addi a2, zero, 14 +; RV64MV-NEXT: mul a1, a1, a2 +; RV64MV-NEXT: sub a0, a0, a1 +; RV64MV-NEXT: seqz a0, a0 +; RV64MV-NEXT: ret + %urem = urem i27 %X, 14 + %cmp = icmp eq i27 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_odd_setne(i4 %X) nounwind { +; RV32-LABEL: test_urem_odd_setne: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: andi a0, a0, 15 +; RV32-NEXT: addi a1, zero, 5 +; RV32-NEXT: call __umodsi3@plt +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_odd_setne: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: andi a0, a0, 15 +; RV64-NEXT: addi a1, zero, 5 +; RV64-NEXT: call __umoddi3@plt +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret +; +; RV32M-LABEL: test_urem_odd_setne: +; RV32M: # %bb.0: +; RV32M-NEXT: andi a0, a0, 15 +; RV32M-NEXT: lui a1, 838861 +; RV32M-NEXT: addi a1, a1, -819 +; RV32M-NEXT: mul a0, a0, a1 +; RV32M-NEXT: lui a1, 209715 +; RV32M-NEXT: addi a1, a1, 819 +; RV32M-NEXT: sltu a0, a1, a0 +; RV32M-NEXT: ret +; +; RV64M-LABEL: test_urem_odd_setne: +; RV64M: # %bb.0: +; RV64M-NEXT: andi a0, a0, 15 +; RV64M-NEXT: lui a1, 1035469 +; RV64M-NEXT: addiw a1, a1, -819 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, -819 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, -819 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, -819 +; RV64M-NEXT: mul a0, a0, a1 +; RV64M-NEXT: lui a1, 13107 +; RV64M-NEXT: addiw a1, a1, 819 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, 819 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, 819 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, 819 +; RV64M-NEXT: sltu a0, a1, a0 +; RV64M-NEXT: ret +; +; RV32MV-LABEL: test_urem_odd_setne: +; RV32MV: # %bb.0: +; RV32MV-NEXT: andi a0, a0, 15 +; RV32MV-NEXT: lui a1, 838861 +; RV32MV-NEXT: addi a1, a1, -819 +; RV32MV-NEXT: mul a0, a0, a1 +; RV32MV-NEXT: lui a1, 209715 +; RV32MV-NEXT: addi a1, a1, 819 +; RV32MV-NEXT: sltu a0, a1, a0 +; RV32MV-NEXT: ret +; +; RV64MV-LABEL: test_urem_odd_setne: +; RV64MV: # %bb.0: +; RV64MV-NEXT: andi a0, a0, 15 +; RV64MV-NEXT: lui a1, 1035469 +; RV64MV-NEXT: addiw a1, a1, -819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -819 +; RV64MV-NEXT: mul a0, a0, a1 +; RV64MV-NEXT: lui a1, 13107 +; RV64MV-NEXT: addiw a1, a1, 819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, 819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, 819 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, 819 +; RV64MV-NEXT: sltu a0, a1, a0 +; RV64MV-NEXT: ret + %urem = urem i4 %X, 5 + %cmp = icmp ne i4 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_negative_odd(i9 %X) nounwind { +; RV32-LABEL: test_urem_negative_odd: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: andi a0, a0, 511 +; RV32-NEXT: addi a1, zero, 507 +; RV32-NEXT: call __umodsi3@plt +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_negative_odd: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: andi a0, a0, 511 +; RV64-NEXT: addi a1, zero, 507 +; RV64-NEXT: call __umoddi3@plt +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret +; +; RV32M-LABEL: test_urem_negative_odd: +; RV32M: # %bb.0: +; RV32M-NEXT: andi a0, a0, 511 +; RV32M-NEXT: lui a1, 692846 +; RV32M-NEXT: addi a1, a1, 307 +; RV32M-NEXT: mul a0, a0, a1 +; RV32M-NEXT: lui a1, 2068 +; RV32M-NEXT: addi a1, a1, 807 +; RV32M-NEXT: sltu a0, a1, a0 +; RV32M-NEXT: ret +; +; RV64M-LABEL: test_urem_negative_odd: +; RV64M: # %bb.0: +; RV64M-NEXT: andi a0, a0, 511 +; RV64M-NEXT: lui a1, 1042824 +; RV64M-NEXT: addiw a1, a1, -711 +; RV64M-NEXT: slli a1, a1, 13 +; RV64M-NEXT: addi a1, a1, 469 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, -1737 +; RV64M-NEXT: slli a1, a1, 13 +; RV64M-NEXT: addi a1, a1, 307 +; RV64M-NEXT: mul a0, a0, a1 +; RV64M-NEXT: lui a1, 132365 +; RV64M-NEXT: addiw a1, a1, -1543 +; RV64M-NEXT: slli a1, a1, 14 +; RV64M-NEXT: addi a1, a1, -1131 +; RV64M-NEXT: slli a1, a1, 12 +; RV64M-NEXT: addi a1, a1, -186 +; RV64M-NEXT: sltu a0, a1, a0 +; RV64M-NEXT: ret +; +; RV32MV-LABEL: test_urem_negative_odd: +; RV32MV: # %bb.0: +; RV32MV-NEXT: andi a0, a0, 511 +; RV32MV-NEXT: lui a1, 692846 +; RV32MV-NEXT: addi a1, a1, 307 +; RV32MV-NEXT: mul a0, a0, a1 +; RV32MV-NEXT: lui a1, 2068 +; RV32MV-NEXT: addi a1, a1, 807 +; RV32MV-NEXT: sltu a0, a1, a0 +; RV32MV-NEXT: ret +; +; RV64MV-LABEL: test_urem_negative_odd: +; RV64MV: # %bb.0: +; RV64MV-NEXT: andi a0, a0, 511 +; RV64MV-NEXT: lui a1, 1042824 +; RV64MV-NEXT: addiw a1, a1, -711 +; RV64MV-NEXT: slli a1, a1, 13 +; RV64MV-NEXT: addi a1, a1, 469 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -1737 +; RV64MV-NEXT: slli a1, a1, 13 +; RV64MV-NEXT: addi a1, a1, 307 +; RV64MV-NEXT: mul a0, a0, a1 +; RV64MV-NEXT: lui a1, 132365 +; RV64MV-NEXT: addiw a1, a1, -1543 +; RV64MV-NEXT: slli a1, a1, 14 +; RV64MV-NEXT: addi a1, a1, -1131 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -186 +; RV64MV-NEXT: sltu a0, a1, a0 +; RV64MV-NEXT: ret + %urem = urem i9 %X, -5 + %cmp = icmp ne i9 %urem, 0 + ret i1 %cmp +} + +define void @test_urem_vec(<3 x i11>* %X) nounwind { +; RV32-LABEL: test_urem_vec: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: mv s0, a0 +; RV32-NEXT: lb a0, 4(a0) +; RV32-NEXT: lw a1, 0(s0) +; RV32-NEXT: slli a0, a0, 10 +; RV32-NEXT: srli a2, a1, 22 +; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: andi s2, a0, 2047 +; RV32-NEXT: andi s1, a1, 2047 +; RV32-NEXT: srli a0, a1, 11 +; RV32-NEXT: andi a0, a0, 2047 +; RV32-NEXT: addi a1, zero, 7 +; RV32-NEXT: call __umodsi3@plt +; RV32-NEXT: mv s3, a0 +; RV32-NEXT: addi a1, zero, 6 +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: call __umodsi3@plt +; RV32-NEXT: mv s1, a0 +; RV32-NEXT: addi a1, zero, 2043 +; RV32-NEXT: mv a0, s2 +; RV32-NEXT: call __umodsi3@plt +; RV32-NEXT: addi a0, a0, -2 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: snez a1, s1 +; RV32-NEXT: addi a2, s3, -1 +; RV32-NEXT: snez a2, a2 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: neg a1, a1 +; RV32-NEXT: neg a3, a0 +; RV32-NEXT: srli a3, a3, 10 +; RV32-NEXT: andi a3, a3, 1 +; RV32-NEXT: sb a3, 4(s0) +; RV32-NEXT: andi a1, a1, 2047 +; RV32-NEXT: andi a2, a2, 2047 +; RV32-NEXT: slli a2, a2, 11 +; RV32-NEXT: or a1, a1, a2 +; RV32-NEXT: slli a0, a0, 22 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: sw a0, 0(s0) +; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: test_urem_vec: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -48 +; RV64-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: mv s0, a0 +; RV64-NEXT: lbu a0, 4(a0) +; RV64-NEXT: lwu a1, 0(s0) +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: srli s2, a0, 22 +; RV64-NEXT: andi s1, a0, 2047 +; RV64-NEXT: srli a0, a0, 11 +; RV64-NEXT: andi a0, a0, 2047 +; RV64-NEXT: addi a1, zero, 7 +; RV64-NEXT: call __umoddi3@plt +; RV64-NEXT: mv s3, a0 +; RV64-NEXT: addi a1, zero, 6 +; RV64-NEXT: mv a0, s1 +; RV64-NEXT: call __umoddi3@plt +; RV64-NEXT: mv s1, a0 +; RV64-NEXT: addi a1, zero, 2043 +; RV64-NEXT: mv a0, s2 +; RV64-NEXT: call __umoddi3@plt +; RV64-NEXT: addi a0, a0, -2 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: snez a1, s1 +; RV64-NEXT: addi a2, s3, -1 +; RV64-NEXT: snez a2, a2 +; RV64-NEXT: neg a2, a2 +; RV64-NEXT: neg a1, a1 +; RV64-NEXT: andi a1, a1, 2047 +; RV64-NEXT: andi a2, a2, 2047 +; RV64-NEXT: slli a2, a2, 11 +; RV64-NEXT: or a1, a1, a2 +; RV64-NEXT: slli a0, a0, 22 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: sw a0, 0(s0) +; RV64-NEXT: addi a1, zero, 1 +; RV64-NEXT: slli a1, a1, 33 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: sb a0, 4(s0) +; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 48 +; RV64-NEXT: ret +; +; RV32M-LABEL: test_urem_vec: +; RV32M: # %bb.0: +; RV32M-NEXT: lb a1, 4(a0) +; RV32M-NEXT: lw a2, 0(a0) +; RV32M-NEXT: slli a1, a1, 10 +; RV32M-NEXT: srli a3, a2, 22 +; RV32M-NEXT: or a1, a3, a1 +; RV32M-NEXT: andi a1, a1, 2047 +; RV32M-NEXT: srli a3, a2, 11 +; RV32M-NEXT: andi a3, a3, 2047 +; RV32M-NEXT: andi a2, a2, 2047 +; RV32M-NEXT: lui a4, 699051 +; RV32M-NEXT: addi a4, a4, -1365 +; RV32M-NEXT: mulhu a4, a2, a4 +; RV32M-NEXT: srli a4, a4, 2 +; RV32M-NEXT: addi a5, zero, 6 +; RV32M-NEXT: mul a4, a4, a5 +; RV32M-NEXT: sub a2, a2, a4 +; RV32M-NEXT: lui a4, 536863 +; RV32M-NEXT: addi a4, a4, -1229 +; RV32M-NEXT: mul a1, a1, a4 +; RV32M-NEXT: lui a4, 1023427 +; RV32M-NEXT: addi a4, a4, -1638 +; RV32M-NEXT: add a1, a1, a4 +; RV32M-NEXT: lui a4, 513 +; RV32M-NEXT: addi a4, a4, 1036 +; RV32M-NEXT: sltu a1, a4, a1 +; RV32M-NEXT: lui a4, 748983 +; RV32M-NEXT: addi a4, a4, -585 +; RV32M-NEXT: mul a3, a3, a4 +; RV32M-NEXT: lui a4, 299593 +; RV32M-NEXT: addi a4, a4, 585 +; RV32M-NEXT: add a3, a3, a4 +; RV32M-NEXT: lui a4, 149797 +; RV32M-NEXT: addi a4, a4, -1756 +; RV32M-NEXT: sltu a3, a4, a3 +; RV32M-NEXT: snez a2, a2 +; RV32M-NEXT: neg a2, a2 +; RV32M-NEXT: neg a3, a3 +; RV32M-NEXT: neg a4, a1 +; RV32M-NEXT: srli a4, a4, 10 +; RV32M-NEXT: andi a4, a4, 1 +; RV32M-NEXT: sb a4, 4(a0) +; RV32M-NEXT: andi a3, a3, 2047 +; RV32M-NEXT: slli a3, a3, 11 +; RV32M-NEXT: andi a2, a2, 2047 +; RV32M-NEXT: or a2, a2, a3 +; RV32M-NEXT: slli a1, a1, 22 +; RV32M-NEXT: sub a1, a2, a1 +; RV32M-NEXT: sw a1, 0(a0) +; RV32M-NEXT: ret +; +; RV64M-LABEL: test_urem_vec: +; RV64M: # %bb.0: +; RV64M-NEXT: lbu a1, 4(a0) +; RV64M-NEXT: lwu a2, 0(a0) +; RV64M-NEXT: slli a1, a1, 32 +; RV64M-NEXT: or a1, a2, a1 +; RV64M-NEXT: srli a2, a1, 11 +; RV64M-NEXT: andi a2, a2, 2047 +; RV64M-NEXT: srli a3, a1, 22 +; RV64M-NEXT: andi a1, a1, 2047 +; RV64M-NEXT: lui a4, 1026731 +; RV64M-NEXT: addiw a4, a4, -1365 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, -1365 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, -1365 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, -1365 +; RV64M-NEXT: mulhu a4, a1, a4 +; RV64M-NEXT: srli a4, a4, 2 +; RV64M-NEXT: addi a5, zero, 6 +; RV64M-NEXT: mul a4, a4, a5 +; RV64M-NEXT: sub a1, a1, a4 +; RV64M-NEXT: snez a1, a1 +; RV64M-NEXT: lui a4, 14948 +; RV64M-NEXT: addiw a4, a4, 2029 +; RV64M-NEXT: slli a4, a4, 13 +; RV64M-NEXT: addi a4, a4, -381 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, 287 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, -1229 +; RV64M-NEXT: mul a3, a3, a4 +; RV64M-NEXT: lui a4, 1436 +; RV64M-NEXT: addiw a4, a4, -2029 +; RV64M-NEXT: slli a4, a4, 13 +; RV64M-NEXT: addi a4, a4, 381 +; RV64M-NEXT: slli a4, a4, 13 +; RV64M-NEXT: addi a4, a4, -573 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, -1638 +; RV64M-NEXT: add a3, a3, a4 +; RV64M-NEXT: lui a4, 16424 +; RV64M-NEXT: addiw a4, a4, 401 +; RV64M-NEXT: slli a4, a4, 14 +; RV64M-NEXT: addi a4, a4, -345 +; RV64M-NEXT: slli a4, a4, 13 +; RV64M-NEXT: addi a4, a4, 1295 +; RV64M-NEXT: sltu a3, a4, a3 +; RV64M-NEXT: lui a4, 28087 +; RV64M-NEXT: addiw a4, a4, -585 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, -585 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, -585 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, -585 +; RV64M-NEXT: mul a2, a2, a4 +; RV64M-NEXT: lui a4, 1020489 +; RV64M-NEXT: addiw a4, a4, 585 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, 585 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, 585 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, 585 +; RV64M-NEXT: add a2, a2, a4 +; RV64M-NEXT: lui a4, 4681 +; RV64M-NEXT: addiw a4, a4, 585 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, 585 +; RV64M-NEXT: slli a4, a4, 12 +; RV64M-NEXT: addi a4, a4, 585 +; RV64M-NEXT: slli a4, a4, 13 +; RV64M-NEXT: addi a4, a4, 1170 +; RV64M-NEXT: sltu a2, a4, a2 +; RV64M-NEXT: neg a1, a1 +; RV64M-NEXT: neg a2, a2 +; RV64M-NEXT: andi a1, a1, 2047 +; RV64M-NEXT: andi a2, a2, 2047 +; RV64M-NEXT: slli a2, a2, 11 +; RV64M-NEXT: or a1, a1, a2 +; RV64M-NEXT: slli a2, a3, 22 +; RV64M-NEXT: sub a1, a1, a2 +; RV64M-NEXT: sw a1, 0(a0) +; RV64M-NEXT: addi a2, zero, 1 +; RV64M-NEXT: slli a2, a2, 33 +; RV64M-NEXT: addi a2, a2, -1 +; RV64M-NEXT: and a1, a1, a2 +; RV64M-NEXT: srli a1, a1, 32 +; RV64M-NEXT: sb a1, 4(a0) +; RV64M-NEXT: ret +; +; RV32MV-LABEL: test_urem_vec: +; RV32MV: # %bb.0: +; RV32MV-NEXT: addi sp, sp, -16 +; RV32MV-NEXT: lb a1, 4(a0) +; RV32MV-NEXT: lw a2, 0(a0) +; RV32MV-NEXT: slli a1, a1, 10 +; RV32MV-NEXT: srli a3, a2, 22 +; RV32MV-NEXT: or a1, a3, a1 +; RV32MV-NEXT: andi a1, a1, 2047 +; RV32MV-NEXT: srli a3, a2, 11 +; RV32MV-NEXT: andi a3, a3, 2047 +; RV32MV-NEXT: andi a2, a2, 2047 +; RV32MV-NEXT: lui a4, 699051 +; RV32MV-NEXT: addi a4, a4, -1365 +; RV32MV-NEXT: mulhu a4, a2, a4 +; RV32MV-NEXT: srli a4, a4, 2 +; RV32MV-NEXT: addi a5, zero, 6 +; RV32MV-NEXT: mul a4, a4, a5 +; RV32MV-NEXT: sub a2, a2, a4 +; RV32MV-NEXT: sh a2, 8(sp) +; RV32MV-NEXT: lui a2, 2566 +; RV32MV-NEXT: addi a2, a2, 1087 +; RV32MV-NEXT: mulhu a2, a1, a2 +; RV32MV-NEXT: sub a4, a1, a2 +; RV32MV-NEXT: srli a4, a4, 1 +; RV32MV-NEXT: add a2, a4, a2 +; RV32MV-NEXT: srli a2, a2, 10 +; RV32MV-NEXT: addi a4, zero, 2043 +; RV32MV-NEXT: mul a2, a2, a4 +; RV32MV-NEXT: sub a1, a1, a2 +; RV32MV-NEXT: sh a1, 12(sp) +; RV32MV-NEXT: lui a1, 149797 +; RV32MV-NEXT: addi a1, a1, -1755 +; RV32MV-NEXT: mulhu a1, a3, a1 +; RV32MV-NEXT: sub a2, a3, a1 +; RV32MV-NEXT: srli a2, a2, 1 +; RV32MV-NEXT: add a1, a2, a1 +; RV32MV-NEXT: srli a1, a1, 2 +; RV32MV-NEXT: slli a2, a1, 3 +; RV32MV-NEXT: sub a1, a1, a2 +; RV32MV-NEXT: add a1, a3, a1 +; RV32MV-NEXT: sh a1, 10(sp) +; RV32MV-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV32MV-NEXT: addi a1, sp, 8 +; RV32MV-NEXT: vle16.v v25, (a1) +; RV32MV-NEXT: lui a1, %hi(.LCPI4_0) +; RV32MV-NEXT: addi a1, a1, %lo(.LCPI4_0) +; RV32MV-NEXT: vle16.v v26, (a1) +; RV32MV-NEXT: addi a1, zero, 2047 +; RV32MV-NEXT: vand.vx v25, v25, a1 +; RV32MV-NEXT: vmsne.vv v0, v25, v26 +; RV32MV-NEXT: vmv.v.i v25, 0 +; RV32MV-NEXT: vmerge.vim v25, v25, -1, v0 +; RV32MV-NEXT: vsetivli a1, 1, e16,m1,ta,mu +; RV32MV-NEXT: vslidedown.vi v26, v25, 2 +; RV32MV-NEXT: vmv.x.s a1, v26 +; RV32MV-NEXT: srli a2, a1, 10 +; RV32MV-NEXT: andi a2, a2, 1 +; RV32MV-NEXT: sb a2, 4(a0) +; RV32MV-NEXT: vmv.x.s a2, v25 +; RV32MV-NEXT: andi a2, a2, 2047 +; RV32MV-NEXT: vslidedown.vi v25, v25, 1 +; RV32MV-NEXT: vmv.x.s a3, v25 +; RV32MV-NEXT: andi a3, a3, 2047 +; RV32MV-NEXT: slli a3, a3, 11 +; RV32MV-NEXT: or a2, a2, a3 +; RV32MV-NEXT: slli a1, a1, 22 +; RV32MV-NEXT: or a1, a2, a1 +; RV32MV-NEXT: sw a1, 0(a0) +; RV32MV-NEXT: addi sp, sp, 16 +; RV32MV-NEXT: ret +; +; RV64MV-LABEL: test_urem_vec: +; RV64MV: # %bb.0: +; RV64MV-NEXT: addi sp, sp, -16 +; RV64MV-NEXT: lbu a1, 4(a0) +; RV64MV-NEXT: lwu a2, 0(a0) +; RV64MV-NEXT: slli a1, a1, 32 +; RV64MV-NEXT: or a1, a2, a1 +; RV64MV-NEXT: srli a2, a1, 11 +; RV64MV-NEXT: andi a2, a2, 2047 +; RV64MV-NEXT: andi a3, a1, 2047 +; RV64MV-NEXT: srli a1, a1, 22 +; RV64MV-NEXT: lui a4, 1027 +; RV64MV-NEXT: addiw a4, a4, -2023 +; RV64MV-NEXT: slli a4, a4, 15 +; RV64MV-NEXT: addi a4, a4, 2005 +; RV64MV-NEXT: slli a4, a4, 12 +; RV64MV-NEXT: addi a4, a4, -431 +; RV64MV-NEXT: slli a4, a4, 13 +; RV64MV-NEXT: addi a4, a4, -429 +; RV64MV-NEXT: mulhu a4, a1, a4 +; RV64MV-NEXT: srli a4, a4, 9 +; RV64MV-NEXT: addi a5, zero, 2043 +; RV64MV-NEXT: mul a4, a4, a5 +; RV64MV-NEXT: sub a1, a1, a4 +; RV64MV-NEXT: sh a1, 12(sp) +; RV64MV-NEXT: lui a1, 1026731 +; RV64MV-NEXT: addiw a1, a1, -1365 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -1365 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -1365 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, -1365 +; RV64MV-NEXT: mulhu a1, a3, a1 +; RV64MV-NEXT: srli a1, a1, 2 +; RV64MV-NEXT: addi a4, zero, 6 +; RV64MV-NEXT: mul a1, a1, a4 +; RV64MV-NEXT: sub a1, a3, a1 +; RV64MV-NEXT: sh a1, 8(sp) +; RV64MV-NEXT: lui a1, 4681 +; RV64MV-NEXT: addiw a1, a1, 585 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, 585 +; RV64MV-NEXT: slli a1, a1, 12 +; RV64MV-NEXT: addi a1, a1, 585 +; RV64MV-NEXT: slli a1, a1, 13 +; RV64MV-NEXT: addi a1, a1, 1171 +; RV64MV-NEXT: mulhu a1, a2, a1 +; RV64MV-NEXT: sub a3, a2, a1 +; RV64MV-NEXT: srli a3, a3, 1 +; RV64MV-NEXT: add a1, a3, a1 +; RV64MV-NEXT: srli a1, a1, 2 +; RV64MV-NEXT: slli a3, a1, 3 +; RV64MV-NEXT: sub a1, a1, a3 +; RV64MV-NEXT: add a1, a2, a1 +; RV64MV-NEXT: sh a1, 10(sp) +; RV64MV-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; RV64MV-NEXT: addi a1, sp, 8 +; RV64MV-NEXT: vle16.v v25, (a1) +; RV64MV-NEXT: lui a1, %hi(.LCPI4_0) +; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_0) +; RV64MV-NEXT: vle16.v v26, (a1) +; RV64MV-NEXT: addi a1, zero, 2047 +; RV64MV-NEXT: vand.vx v25, v25, a1 +; RV64MV-NEXT: vmsne.vv v0, v25, v26 +; RV64MV-NEXT: vmv.v.i v25, 0 +; RV64MV-NEXT: vmerge.vim v25, v25, -1, v0 +; RV64MV-NEXT: vmv.x.s a1, v25 +; RV64MV-NEXT: andi a1, a1, 2047 +; RV64MV-NEXT: addi a2, zero, 1 +; RV64MV-NEXT: vsetivli a3, 1, e16,m1,ta,mu +; RV64MV-NEXT: vslidedown.vi v26, v25, 1 +; RV64MV-NEXT: vmv.x.s a3, v26 +; RV64MV-NEXT: andi a3, a3, 2047 +; RV64MV-NEXT: slli a3, a3, 11 +; RV64MV-NEXT: or a1, a1, a3 +; RV64MV-NEXT: vslidedown.vi v25, v25, 2 +; RV64MV-NEXT: vmv.x.s a3, v25 +; RV64MV-NEXT: slli a3, a3, 22 +; RV64MV-NEXT: or a1, a1, a3 +; RV64MV-NEXT: sw a1, 0(a0) +; RV64MV-NEXT: slli a2, a2, 33 +; RV64MV-NEXT: addi a2, a2, -1 +; RV64MV-NEXT: and a1, a1, a2 +; RV64MV-NEXT: srli a1, a1, 32 +; RV64MV-NEXT: sb a1, 4(a0) +; RV64MV-NEXT: addi sp, sp, 16 +; RV64MV-NEXT: ret + %ld = load <3 x i11>, <3 x i11>* %X + %urem = urem <3 x i11> %ld, + %cmp = icmp ne <3 x i11> %urem, + %ext = sext <3 x i1> %cmp to <3 x i11> + store <3 x i11> %ext, <3 x i11>* %X + ret void +} diff --git a/llvm/test/CodeGen/Thumb/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb/srem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb/srem-seteq-illegal-types.ll @@ -0,0 +1,137 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumb-eabi < %s | FileCheck %s + +define i1 @test_srem_odd(i29 %X) nounwind { +; CHECK-LABEL: test_srem_odd: +; CHECK: @ %bb.0: +; CHECK-NEXT: lsls r0, r0, #3 +; CHECK-NEXT: asrs r0, r0, #3 +; CHECK-NEXT: ldr r1, .LCPI0_0 +; CHECK-NEXT: muls r1, r0, r1 +; CHECK-NEXT: ldr r0, .LCPI0_1 +; CHECK-NEXT: adds r0, r1, r0 +; CHECK-NEXT: ldr r1, .LCPI0_2 +; CHECK-NEXT: cmp r0, r1 +; CHECK-NEXT: blo .LBB0_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 3210379595 @ 0xbf5a814b +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .long 21691754 @ 0x14afd6a +; CHECK-NEXT: .LCPI0_2: +; CHECK-NEXT: .long 43383509 @ 0x295fad5 + %srem = srem i29 %X, 99 + %cmp = icmp eq i29 %srem, 0 + ret i1 %cmp +} + +define i1 @test_srem_even(i4 %X) nounwind { +; CHECK-LABEL: test_srem_even: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: lsls r0, r0, #28 +; CHECK-NEXT: asrs r0, r0, #28 +; CHECK-NEXT: movs r1, #6 +; CHECK-NEXT: bl __aeabi_idivmod +; CHECK-NEXT: subs r1, r1, #1 +; CHECK-NEXT: rsbs r0, r1, #0 +; CHECK-NEXT: adcs r0, r1 +; CHECK-NEXT: pop {r7} +; CHECK-NEXT: pop {r1} +; CHECK-NEXT: bx r1 + %srem = srem i4 %X, 6 + %cmp = icmp eq i4 %srem, 1 + ret i1 %cmp +} + +define i1 @test_srem_pow2_setne(i6 %X) nounwind { +; CHECK-LABEL: test_srem_pow2_setne: +; CHECK: @ %bb.0: +; CHECK-NEXT: lsls r1, r0, #26 +; CHECK-NEXT: asrs r1, r1, #26 +; CHECK-NEXT: lsrs r1, r1, #30 +; CHECK-NEXT: adds r1, r0, r1 +; CHECK-NEXT: movs r2, #60 +; CHECK-NEXT: ands r2, r1 +; CHECK-NEXT: subs r1, r0, r2 +; CHECK-NEXT: movs r0, #63 +; CHECK-NEXT: ands r0, r1 +; CHECK-NEXT: subs r1, r0, #1 +; CHECK-NEXT: sbcs r0, r1 +; CHECK-NEXT: bx lr + %srem = srem i6 %X, 4 + %cmp = icmp ne i6 %srem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { +; CHECK-LABEL: test_srem_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .pad #12 +; CHECK-NEXT: sub sp, #12 +; CHECK-NEXT: movs r7, r3 +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: movs r5, #1 +; CHECK-NEXT: ands r1, r5 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: movs r6, #9 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: movs r2, r6 +; CHECK-NEXT: bl __aeabi_ldivmod +; CHECK-NEXT: movs r4, r2 +; CHECK-NEXT: movs r0, #3 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: eors r4, r0 +; CHECK-NEXT: orrs r4, r3 +; CHECK-NEXT: subs r0, r4, #1 +; CHECK-NEXT: sbcs r4, r0 +; CHECK-NEXT: ands r7, r5 +; CHECK-NEXT: rsbs r1, r7, #0 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: movs r2, r6 +; CHECK-NEXT: ldr r7, [sp] @ 4-byte Reload +; CHECK-NEXT: movs r3, r7 +; CHECK-NEXT: bl __aeabi_ldivmod +; CHECK-NEXT: movs r0, r5 +; CHECK-NEXT: bics r0, r3 +; CHECK-NEXT: movs r1, #2 +; CHECK-NEXT: mvns r6, r1 +; CHECK-NEXT: eors r6, r2 +; CHECK-NEXT: orrs r6, r0 +; CHECK-NEXT: subs r0, r6, #1 +; CHECK-NEXT: sbcs r6, r0 +; CHECK-NEXT: ldr r0, [sp, #36] +; CHECK-NEXT: ands r0, r5 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: movs r0, #8 +; CHECK-NEXT: mvns r2, r0 +; CHECK-NEXT: mvns r3, r7 +; CHECK-NEXT: ldr r0, [sp, #32] +; CHECK-NEXT: bl __aeabi_ldivmod +; CHECK-NEXT: ands r3, r5 +; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: eors r2, r0 +; CHECK-NEXT: orrs r2, r3 +; CHECK-NEXT: subs r0, r2, #1 +; CHECK-NEXT: sbcs r2, r0 +; CHECK-NEXT: movs r0, r4 +; CHECK-NEXT: movs r1, r6 +; CHECK-NEXT: add sp, #12 +; CHECK-NEXT: pop {r4, r5, r6, r7} +; CHECK-NEXT: pop {r3} +; CHECK-NEXT: bx r3 + %srem = srem <3 x i33> %X, + %cmp = icmp ne <3 x i33> %srem, + ret <3 x i1> %cmp +} diff --git a/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll @@ -0,0 +1,188 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumb-eabi < %s | FileCheck %s + +define i1 @test_urem_odd(i13 %X) nounwind { +; CHECK-LABEL: test_urem_odd: +; CHECK: @ %bb.0: +; CHECK-NEXT: ldr r1, .LCPI0_0 +; CHECK-NEXT: ands r1, r0 +; CHECK-NEXT: ldr r0, .LCPI0_1 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: ldr r1, .LCPI0_2 +; CHECK-NEXT: cmp r0, r1 +; CHECK-NEXT: blo .LBB0_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 8191 @ 0x1fff +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .long 3435973837 @ 0xcccccccd +; CHECK-NEXT: .LCPI0_2: +; CHECK-NEXT: .long 858993460 @ 0x33333334 + %urem = urem i13 %X, 5 + %cmp = icmp eq i13 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_even(i27 %X) nounwind { +; CHECK-LABEL: test_urem_even: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #31 +; CHECK-NEXT: lsls r1, r1, #27 +; CHECK-NEXT: bics r0, r1 +; CHECK-NEXT: ldr r1, .LCPI1_0 +; CHECK-NEXT: muls r1, r0, r1 +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: rors r1, r0 +; CHECK-NEXT: ldr r2, .LCPI1_1 +; CHECK-NEXT: cmp r1, r2 +; CHECK-NEXT: blo .LBB1_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .long 3067833783 @ 0xb6db6db7 +; CHECK-NEXT: .LCPI1_1: +; CHECK-NEXT: .long 306783379 @ 0x12492493 + %urem = urem i27 %X, 14 + %cmp = icmp eq i27 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_odd_setne(i4 %X) nounwind { +; CHECK-LABEL: test_urem_odd_setne: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #15 +; CHECK-NEXT: ands r1, r0 +; CHECK-NEXT: ldr r0, .LCPI2_0 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: ldr r1, .LCPI2_1 +; CHECK-NEXT: cmp r0, r1 +; CHECK-NEXT: bhi .LBB2_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: .LCPI2_0: +; CHECK-NEXT: .long 3435973837 @ 0xcccccccd +; CHECK-NEXT: .LCPI2_1: +; CHECK-NEXT: .long 858993459 @ 0x33333333 + %urem = urem i4 %X, 5 + %cmp = icmp ne i4 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_negative_odd(i9 %X) nounwind { +; CHECK-LABEL: test_urem_negative_odd: +; CHECK: @ %bb.0: +; CHECK-NEXT: ldr r1, .LCPI3_0 +; CHECK-NEXT: ands r1, r0 +; CHECK-NEXT: ldr r0, .LCPI3_1 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: ldr r1, .LCPI3_2 +; CHECK-NEXT: cmp r0, r1 +; CHECK-NEXT: bhi .LBB3_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: .LCPI3_0: +; CHECK-NEXT: .long 511 @ 0x1ff +; CHECK-NEXT: .LCPI3_1: +; CHECK-NEXT: .long 2837897523 @ 0xa926e133 +; CHECK-NEXT: .LCPI3_2: +; CHECK-NEXT: .long 8471335 @ 0x814327 + %urem = urem i9 %X, -5 + %cmp = icmp ne i9 %urem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { +; CHECK-LABEL: test_urem_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: movs r3, r2 +; CHECK-NEXT: ldr r5, .LCPI4_0 +; CHECK-NEXT: ands r0, r5 +; CHECK-NEXT: ldr r6, .LCPI4_1 +; CHECK-NEXT: muls r6, r0, r6 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: rors r6, r2 +; CHECK-NEXT: ldr r0, .LCPI4_2 +; CHECK-NEXT: movs r4, #0 +; CHECK-NEXT: cmp r6, r0 +; CHECK-NEXT: push {r2} +; CHECK-NEXT: pop {r0} +; CHECK-NEXT: bhi .LBB4_2 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: movs r0, r4 +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: ands r1, r5 +; CHECK-NEXT: ldr r6, .LCPI4_3 +; CHECK-NEXT: muls r6, r1, r6 +; CHECK-NEXT: ldr r1, .LCPI4_4 +; CHECK-NEXT: adds r1, r6, r1 +; CHECK-NEXT: ldr r6, .LCPI4_5 +; CHECK-NEXT: cmp r1, r6 +; CHECK-NEXT: push {r2} +; CHECK-NEXT: pop {r1} +; CHECK-NEXT: bhi .LBB4_4 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: movs r1, r4 +; CHECK-NEXT: .LBB4_4: +; CHECK-NEXT: ands r3, r5 +; CHECK-NEXT: ldr r5, .LCPI4_6 +; CHECK-NEXT: muls r5, r3, r5 +; CHECK-NEXT: ldr r3, .LCPI4_7 +; CHECK-NEXT: adds r3, r5, r3 +; CHECK-NEXT: ldr r5, .LCPI4_8 +; CHECK-NEXT: cmp r3, r5 +; CHECK-NEXT: bhi .LBB4_6 +; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: movs r2, r4 +; CHECK-NEXT: .LBB4_6: +; CHECK-NEXT: pop {r4, r5, r6} +; CHECK-NEXT: pop {r3} +; CHECK-NEXT: bx r3 +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: .LCPI4_0: +; CHECK-NEXT: .long 2047 @ 0x7ff +; CHECK-NEXT: .LCPI4_1: +; CHECK-NEXT: .long 2863311531 @ 0xaaaaaaab +; CHECK-NEXT: .LCPI4_2: +; CHECK-NEXT: .long 715827882 @ 0x2aaaaaaa +; CHECK-NEXT: .LCPI4_3: +; CHECK-NEXT: .long 3067833783 @ 0xb6db6db7 +; CHECK-NEXT: .LCPI4_4: +; CHECK-NEXT: .long 1227133513 @ 0x49249249 +; CHECK-NEXT: .LCPI4_5: +; CHECK-NEXT: .long 613566756 @ 0x24924924 +; CHECK-NEXT: .LCPI4_6: +; CHECK-NEXT: .long 2198989619 @ 0x8311eb33 +; CHECK-NEXT: .LCPI4_7: +; CHECK-NEXT: .long 4191955354 @ 0xf9dc299a +; CHECK-NEXT: .LCPI4_8: +; CHECK-NEXT: .long 2102284 @ 0x20140c + %urem = urem <3 x i11> %X, + %cmp = icmp ne <3 x i11> %urem, + ret <3 x i1> %cmp +} diff --git a/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/srem-seteq-illegal-types.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv7-none-eabi < %s | FileCheck %s + +define i1 @test_srem_odd(i29 %X) nounwind { +; CHECK-LABEL: test_srem_odd: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #64874 +; CHECK-NEXT: movw r2, #33099 +; CHECK-NEXT: sbfx r0, r0, #0, #29 +; CHECK-NEXT: movt r1, #330 +; CHECK-NEXT: movt r2, #48986 +; CHECK-NEXT: mla r1, r0, r2, r1 +; CHECK-NEXT: movw r2, #64213 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movt r2, #661 +; CHECK-NEXT: cmp r1, r2 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r0, #1 +; CHECK-NEXT: bx lr + %srem = srem i29 %X, 99 + %cmp = icmp eq i29 %srem, 0 + ret i1 %cmp +} + +define i1 @test_srem_even(i4 %X) nounwind { +; CHECK-LABEL: test_srem_even: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r2, #43691 +; CHECK-NEXT: sbfx r1, r0, #0, #4 +; CHECK-NEXT: movt r2, #10922 +; CHECK-NEXT: lsls r0, r0, #28 +; CHECK-NEXT: smmul r1, r1, r2 +; CHECK-NEXT: add.w r1, r1, r1, lsr #31 +; CHECK-NEXT: add.w r1, r1, r1, lsl #1 +; CHECK-NEXT: mvn.w r1, r1, lsl #1 +; CHECK-NEXT: add.w r0, r1, r0, asr #28 +; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: lsrs r0, r0, #5 +; CHECK-NEXT: bx lr + %srem = srem i4 %X, 6 + %cmp = icmp eq i4 %srem, 1 + ret i1 %cmp +} + +define i1 @test_srem_pow2_setne(i6 %X) nounwind { +; CHECK-LABEL: test_srem_pow2_setne: +; CHECK: @ %bb.0: +; CHECK-NEXT: sbfx r1, r0, #0, #6 +; CHECK-NEXT: ubfx r1, r1, #9, #2 +; CHECK-NEXT: add r1, r0 +; CHECK-NEXT: and r1, r1, #60 +; CHECK-NEXT: subs r0, r0, r1 +; CHECK-NEXT: ands r0, r0, #63 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: bx lr + %srem = srem i6 %X, 4 + %cmp = icmp ne i6 %srem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { +; CHECK-LABEL: test_srem_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: and r0, r3, #1 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: movs r2, #9 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: bl __aeabi_ldivmod +; CHECK-NEXT: and r0, r4, #1 +; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: mov r7, r3 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: movs r2, #9 +; CHECK-NEXT: movs r3, #0 +; CHECK-NEXT: bl __aeabi_ldivmod +; CHECK-NEXT: ldr r1, [sp, #44] +; CHECK-NEXT: vmov.32 d8[0], r2 +; CHECK-NEXT: ldr r0, [sp, #40] +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: and r1, r1, #1 +; CHECK-NEXT: mvn r2, #8 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: mov.w r3, #-1 +; CHECK-NEXT: vmov.32 d9[0], r6 +; CHECK-NEXT: bl __aeabi_ldivmod +; CHECK-NEXT: vmov.32 d16[0], r2 +; CHECK-NEXT: adr r0, .LCPI3_0 +; CHECK-NEXT: vmov.32 d9[1], r7 +; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128] +; CHECK-NEXT: adr r0, .LCPI3_1 +; CHECK-NEXT: vmov.32 d16[1], r3 +; CHECK-NEXT: vmov.32 d8[1], r4 +; CHECK-NEXT: vand q8, q8, q9 +; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128] +; CHECK-NEXT: adr r0, .LCPI3_2 +; CHECK-NEXT: vand q11, q4, q9 +; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128] +; CHECK-NEXT: vceq.i32 q10, q11, q10 +; CHECK-NEXT: vceq.i32 q8, q8, q9 +; CHECK-NEXT: vrev64.32 q9, q10 +; CHECK-NEXT: vrev64.32 q11, q8 +; CHECK-NEXT: vand q9, q10, q9 +; CHECK-NEXT: vand q8, q8, q11 +; CHECK-NEXT: vmvn q9, q9 +; CHECK-NEXT: vmvn q8, q8 +; CHECK-NEXT: vmovn.i64 d18, q9 +; CHECK-NEXT: vmovn.i64 d16, q8 +; CHECK-NEXT: vmov.32 r0, d18[0] +; CHECK-NEXT: vmov.32 r1, d18[1] +; CHECK-NEXT: vmov.32 r2, d16[0] +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI3_0: +; CHECK-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .LCPI3_1: +; CHECK-NEXT: .long 3 @ 0x3 +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 4294967293 @ 0xfffffffd +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .LCPI3_2: +; CHECK-NEXT: .long 3 @ 0x3 +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .zero 4 +; CHECK-NEXT: .long 0 @ 0x0 + %srem = srem <3 x i33> %X, + %cmp = icmp ne <3 x i33> %srem, + ret <3 x i1> %cmp +} diff --git a/llvm/test/CodeGen/Thumb2/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb2/urem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/urem-seteq-illegal-types.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv7-none-eabi < %s | FileCheck %s + +define i1 @test_urem_odd(i13 %X) nounwind { +; CHECK-LABEL: test_urem_odd: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #52429 +; CHECK-NEXT: bfc r0, #13, #19 +; CHECK-NEXT: movt r1, #52428 +; CHECK-NEXT: muls r1, r0, r1 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: cmn.w r1, #-858993460 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r0, #1 +; CHECK-NEXT: bx lr + %urem = urem i13 %X, 5 + %cmp = icmp eq i13 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_even(i27 %X) nounwind { +; CHECK-LABEL: test_urem_even: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #28087 +; CHECK-NEXT: bic r0, r0, #-134217728 +; CHECK-NEXT: movt r1, #46811 +; CHECK-NEXT: movw r2, #9363 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: movt r2, #4681 +; CHECK-NEXT: ror.w r1, r0, #1 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: cmp r1, r2 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r0, #1 +; CHECK-NEXT: bx lr + %urem = urem i27 %X, 14 + %cmp = icmp eq i27 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_odd_setne(i4 %X) nounwind { +; CHECK-LABEL: test_urem_odd_setne: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #52429 +; CHECK-NEXT: and r0, r0, #15 +; CHECK-NEXT: movt r1, #52428 +; CHECK-NEXT: muls r1, r0, r1 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: cmp.w r1, #858993459 +; CHECK-NEXT: it hi +; CHECK-NEXT: movhi r0, #1 +; CHECK-NEXT: bx lr + %urem = urem i4 %X, 5 + %cmp = icmp ne i4 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_negative_odd(i9 %X) nounwind { +; CHECK-LABEL: test_urem_negative_odd: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r1, #57651 +; CHECK-NEXT: bfc r0, #9, #23 +; CHECK-NEXT: movt r1, #43302 +; CHECK-NEXT: movw r2, #17191 +; CHECK-NEXT: muls r1, r0, r1 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: movt r2, #129 +; CHECK-NEXT: cmp r1, r2 +; CHECK-NEXT: it hi +; CHECK-NEXT: movhi r0, #1 +; CHECK-NEXT: bx lr + %urem = urem i9 %X, -5 + %cmp = icmp ne i9 %urem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { +; CHECK-LABEL: test_urem_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: movw r3, #18725 +; CHECK-NEXT: bfc r1, #11, #21 +; CHECK-NEXT: movt r3, #9362 +; CHECK-NEXT: bfc r2, #11, #21 +; CHECK-NEXT: umull r3, r12, r1, r3 +; CHECK-NEXT: bfc r0, #11, #21 +; CHECK-NEXT: movw r3, #25663 +; CHECK-NEXT: movt r3, #160 +; CHECK-NEXT: umull r3, lr, r2, r3 +; CHECK-NEXT: vldr d17, .LCPI4_0 +; CHECK-NEXT: movw r3, #43691 +; CHECK-NEXT: movt r3, #43690 +; CHECK-NEXT: umull r3, r4, r0, r3 +; CHECK-NEXT: sub.w r3, r1, r12 +; CHECK-NEXT: add.w r3, r12, r3, lsr #1 +; CHECK-NEXT: lsr.w r12, r3, #2 +; CHECK-NEXT: sub.w r3, r2, lr +; CHECK-NEXT: lsrs r4, r4, #2 +; CHECK-NEXT: add.w r4, r4, r4, lsl #1 +; CHECK-NEXT: add.w r3, lr, r3, lsr #1 +; CHECK-NEXT: sub.w r0, r0, r4, lsl #1 +; CHECK-NEXT: lsr.w lr, r3, #10 +; CHECK-NEXT: movw r3, #2043 +; CHECK-NEXT: vmov.16 d16[0], r0 +; CHECK-NEXT: sub.w r0, r12, r12, lsl #3 +; CHECK-NEXT: mls r2, lr, r3, r2 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: vmov.16 d16[1], r0 +; CHECK-NEXT: vmov.16 d16[2], r2 +; CHECK-NEXT: vbic.i16 d16, #0xf800 +; CHECK-NEXT: vceq.i16 d16, d16, d17 +; CHECK-NEXT: vmvn d16, d16 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: vmov.u16 r1, d16[1] +; CHECK-NEXT: vmov.u16 r2, d16[2] +; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI4_0: +; CHECK-NEXT: .short 0 @ 0x0 +; CHECK-NEXT: .short 1 @ 0x1 +; CHECK-NEXT: .short 2 @ 0x2 +; CHECK-NEXT: .short 0 @ 0x0 + %urem = urem <3 x i11> %X, + %cmp = icmp ne <3 x i11> %urem, + ret <3 x i1> %cmp +} diff --git a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll @@ -0,0 +1,444 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X86 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X64 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=X64,SSE2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=X64,SSE41 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=X64,AVX1 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=X64,AVX2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=X64,AVX512VL + +define i1 @test_srem_odd(i29 %X) nounwind { +; X86-LABEL: test_srem_odd: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $3, %eax +; X86-NEXT: sarl $3, %eax +; X86-NEXT: imull $-1084587701, %eax, %eax # imm = 0xBF5A814B +; X86-NEXT: addl $21691754, %eax # imm = 0x14AFD6A +; X86-NEXT: cmpl $43383509, %eax # imm = 0x295FAD5 +; X86-NEXT: setb %al +; X86-NEXT: retl +; +; X64-LABEL: test_srem_odd: +; X64: # %bb.0: +; X64-NEXT: shll $3, %edi +; X64-NEXT: sarl $3, %edi +; X64-NEXT: imull $-1084587701, %edi, %eax # imm = 0xBF5A814B +; X64-NEXT: addl $21691754, %eax # imm = 0x14AFD6A +; X64-NEXT: cmpl $43383509, %eax # imm = 0x295FAD5 +; X64-NEXT: setb %al +; X64-NEXT: retq + %srem = srem i29 %X, 99 + %cmp = icmp eq i29 %srem, 0 + ret i1 %cmp +} + +define i1 @test_srem_even(i4 %X) nounwind { +; X86-LABEL: test_srem_even: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shlb $4, %al +; X86-NEXT: sarb $4, %al +; X86-NEXT: movsbl %al, %ecx +; X86-NEXT: imull $43, %ecx, %ecx +; X86-NEXT: movzwl %cx, %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: shrl $15, %edx +; X86-NEXT: addb %ch, %dl +; X86-NEXT: movzbl %dl, %ecx +; X86-NEXT: addl %ecx, %ecx +; X86-NEXT: leal (%ecx,%ecx,2), %ecx +; X86-NEXT: subb %cl, %al +; X86-NEXT: cmpb $1, %al +; X86-NEXT: sete %al +; X86-NEXT: retl +; +; X64-LABEL: test_srem_even: +; X64: # %bb.0: +; X64-NEXT: shlb $4, %dil +; X64-NEXT: sarb $4, %dil +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: imull $43, %eax, %ecx +; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: movl %ecx, %edx +; X64-NEXT: shrl $15, %edx +; X64-NEXT: shrl $8, %ecx +; X64-NEXT: addb %dl, %cl +; X64-NEXT: movzbl %cl, %ecx +; X64-NEXT: addl %ecx, %ecx +; X64-NEXT: leal (%rcx,%rcx,2), %ecx +; X64-NEXT: subb %cl, %al +; X64-NEXT: cmpb $1, %al +; X64-NEXT: sete %al +; X64-NEXT: retq + %srem = srem i4 %X, 6 + %cmp = icmp eq i4 %srem, 1 + ret i1 %cmp +} + +define i1 @test_srem_pow2_setne(i6 %X) nounwind { +; X86-LABEL: test_srem_pow2_setne: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: shlb $2, %cl +; X86-NEXT: sarb $5, %cl +; X86-NEXT: shrb $4, %cl +; X86-NEXT: andb $3, %cl +; X86-NEXT: addb %al, %cl +; X86-NEXT: andb $60, %cl +; X86-NEXT: subb %cl, %al +; X86-NEXT: testb $63, %al +; X86-NEXT: setne %al +; X86-NEXT: retl +; +; X64-LABEL: test_srem_pow2_setne: +; X64: # %bb.0: +; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (,%rdi,4), %eax +; X64-NEXT: sarb $5, %al +; X64-NEXT: shrb $4, %al +; X64-NEXT: andb $3, %al +; X64-NEXT: addb %dil, %al +; X64-NEXT: andb $60, %al +; X64-NEXT: subb %al, %dil +; X64-NEXT: testb $63, %dil +; X64-NEXT: setne %al +; X64-NEXT: retq + %srem = srem i6 %X, 4 + %cmp = icmp ne i6 %srem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { +; X86-LABEL: test_srem_vec: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: andl $1, %edi +; X86-NEXT: negl %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: andl $1, %ebp +; X86-NEXT: negl %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: negl %eax +; X86-NEXT: pushl $-1 +; X86-NEXT: pushl $-9 +; X86-NEXT: pushl %eax +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __moddi3 +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $9 +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: calll __moddi3 +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: notl %ebp +; X86-NEXT: pushl $0 +; X86-NEXT: pushl $9 +; X86-NEXT: pushl %edi +; X86-NEXT: pushl {{[0-9]+}}(%esp) +; X86-NEXT: calll __moddi3 +; X86-NEXT: addl $16, %esp +; X86-NEXT: xorl $3, %eax +; X86-NEXT: orl %edx, %eax +; X86-NEXT: setne %al +; X86-NEXT: xorl $3, %esi +; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: setne %cl +; X86-NEXT: xorl $-3, %ebx +; X86-NEXT: andl $1, %ebp +; X86-NEXT: orl %ebx, %ebp +; X86-NEXT: setne %dl +; X86-NEXT: addl $12, %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; SSE2-LABEL: test_srem_vec: +; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdx, %rcx +; SSE2-NEXT: shlq $31, %rcx +; SSE2-NEXT: sarq $31, %rcx +; SSE2-NEXT: shlq $31, %rdi +; SSE2-NEXT: sarq $31, %rdi +; SSE2-NEXT: shlq $31, %rsi +; SSE2-NEXT: sarq $31, %rsi +; SSE2-NEXT: movabsq $2049638230412172402, %r8 # imm = 0x1C71C71C71C71C72 +; SSE2-NEXT: movq %rsi, %rax +; SSE2-NEXT: imulq %r8 +; SSE2-NEXT: movq %rdx, %rax +; SSE2-NEXT: shrq $63, %rax +; SSE2-NEXT: addq %rdx, %rax +; SSE2-NEXT: leaq (%rax,%rax,8), %rax +; SSE2-NEXT: subq %rax, %rsi +; SSE2-NEXT: movq %rsi, %xmm0 +; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: imulq %r8 +; SSE2-NEXT: movq %rdx, %rax +; SSE2-NEXT: shrq $63, %rax +; SSE2-NEXT: addq %rdx, %rax +; SSE2-NEXT: leaq (%rax,%rax,8), %rax +; SSE2-NEXT: subq %rax, %rdi +; SSE2-NEXT: movq %rdi, %xmm1 +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8589934591,8589934591] +; SSE2-NEXT: pand %xmm0, %xmm1 +; SSE2-NEXT: movabsq $2049638230412172401, %rdx # imm = 0x1C71C71C71C71C71 +; SSE2-NEXT: movq %rcx, %rax +; SSE2-NEXT: imulq %rdx +; SSE2-NEXT: subq %rcx, %rdx +; SSE2-NEXT: movq %rdx, %rax +; SSE2-NEXT: shrq $63, %rax +; SSE2-NEXT: sarq $3, %rdx +; SSE2-NEXT: addq %rax, %rdx +; SSE2-NEXT: leaq (%rdx,%rdx,8), %rax +; SSE2-NEXT: addq %rcx, %rax +; SSE2-NEXT: movq %rax, %xmm2 +; SSE2-NEXT: pand %xmm0, %xmm2 +; SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,3] +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_srem_vec: +; SSE41: # %bb.0: +; SSE41-NEXT: movq %rdx, %rcx +; SSE41-NEXT: shlq $31, %rcx +; SSE41-NEXT: sarq $31, %rcx +; SSE41-NEXT: shlq $31, %rdi +; SSE41-NEXT: sarq $31, %rdi +; SSE41-NEXT: shlq $31, %rsi +; SSE41-NEXT: sarq $31, %rsi +; SSE41-NEXT: movabsq $2049638230412172402, %r8 # imm = 0x1C71C71C71C71C72 +; SSE41-NEXT: movq %rsi, %rax +; SSE41-NEXT: imulq %r8 +; SSE41-NEXT: movq %rdx, %rax +; SSE41-NEXT: shrq $63, %rax +; SSE41-NEXT: addq %rdx, %rax +; SSE41-NEXT: leaq (%rax,%rax,8), %rax +; SSE41-NEXT: subq %rax, %rsi +; SSE41-NEXT: movq %rsi, %xmm1 +; SSE41-NEXT: movq %rdi, %rax +; SSE41-NEXT: imulq %r8 +; SSE41-NEXT: movq %rdx, %rax +; SSE41-NEXT: shrq $63, %rax +; SSE41-NEXT: addq %rdx, %rax +; SSE41-NEXT: leaq (%rax,%rax,8), %rax +; SSE41-NEXT: subq %rax, %rdi +; SSE41-NEXT: movq %rdi, %xmm0 +; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [8589934591,8589934591] +; SSE41-NEXT: pand %xmm1, %xmm0 +; SSE41-NEXT: movabsq $2049638230412172401, %rdx # imm = 0x1C71C71C71C71C71 +; SSE41-NEXT: movq %rcx, %rax +; SSE41-NEXT: imulq %rdx +; SSE41-NEXT: subq %rcx, %rdx +; SSE41-NEXT: movq %rdx, %rax +; SSE41-NEXT: shrq $63, %rax +; SSE41-NEXT: sarq $3, %rdx +; SSE41-NEXT: addq %rax, %rdx +; SSE41-NEXT: leaq (%rdx,%rdx,8), %rax +; SSE41-NEXT: addq %rcx, %rax +; SSE41-NEXT: movq %rax, %xmm2 +; SSE41-NEXT: pand %xmm1, %xmm2 +; SSE41-NEXT: pcmpeqq {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqq {{.*}}(%rip), %xmm2 +; SSE41-NEXT: pxor %xmm1, %xmm2 +; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: pextrb $8, %xmm0, %edx +; SSE41-NEXT: pextrb $0, %xmm2, %ecx +; SSE41-NEXT: # kill: def $al killed $al killed $eax +; SSE41-NEXT: # kill: def $dl killed $dl killed $edx +; SSE41-NEXT: # kill: def $cl killed $cl killed $ecx +; SSE41-NEXT: retq +; +; AVX1-LABEL: test_srem_vec: +; AVX1: # %bb.0: +; AVX1-NEXT: movq %rdx, %rcx +; AVX1-NEXT: shlq $31, %rcx +; AVX1-NEXT: sarq $31, %rcx +; AVX1-NEXT: shlq $31, %rdi +; AVX1-NEXT: sarq $31, %rdi +; AVX1-NEXT: shlq $31, %rsi +; AVX1-NEXT: sarq $31, %rsi +; AVX1-NEXT: movabsq $2049638230412172402, %r8 # imm = 0x1C71C71C71C71C72 +; AVX1-NEXT: movq %rsi, %rax +; AVX1-NEXT: imulq %r8 +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: addq %rdx, %rax +; AVX1-NEXT: leaq (%rax,%rax,8), %rax +; AVX1-NEXT: subq %rax, %rsi +; AVX1-NEXT: vmovq %rsi, %xmm0 +; AVX1-NEXT: movq %rdi, %rax +; AVX1-NEXT: imulq %r8 +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: addq %rdx, %rax +; AVX1-NEXT: leaq (%rax,%rax,8), %rax +; AVX1-NEXT: subq %rax, %rdi +; AVX1-NEXT: vmovq %rdi, %xmm1 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: movabsq $2049638230412172401, %rdx # imm = 0x1C71C71C71C71C71 +; AVX1-NEXT: movq %rcx, %rax +; AVX1-NEXT: imulq %rdx +; AVX1-NEXT: subq %rcx, %rdx +; AVX1-NEXT: movq %rdx, %rax +; AVX1-NEXT: shrq $63, %rax +; AVX1-NEXT: sarq $3, %rdx +; AVX1-NEXT: addq %rax, %rdx +; AVX1-NEXT: leaq (%rdx,%rdx,8), %rax +; AVX1-NEXT: addq %rcx, %rax +; AVX1-NEXT: vmovq %rax, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: vpextrb $8, %xmm0, %edx +; AVX1-NEXT: vpextrb $0, %xmm1, %ecx +; AVX1-NEXT: # kill: def $al killed $al killed $eax +; AVX1-NEXT: # kill: def $dl killed $dl killed $edx +; AVX1-NEXT: # kill: def $cl killed $cl killed $ecx +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_srem_vec: +; AVX2: # %bb.0: +; AVX2-NEXT: movq %rdx, %rcx +; AVX2-NEXT: shlq $31, %rcx +; AVX2-NEXT: sarq $31, %rcx +; AVX2-NEXT: shlq $31, %rdi +; AVX2-NEXT: sarq $31, %rdi +; AVX2-NEXT: shlq $31, %rsi +; AVX2-NEXT: sarq $31, %rsi +; AVX2-NEXT: movabsq $2049638230412172402, %r8 # imm = 0x1C71C71C71C71C72 +; AVX2-NEXT: movq %rsi, %rax +; AVX2-NEXT: imulq %r8 +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: addq %rdx, %rax +; AVX2-NEXT: leaq (%rax,%rax,8), %rax +; AVX2-NEXT: subq %rax, %rsi +; AVX2-NEXT: vmovq %rsi, %xmm0 +; AVX2-NEXT: movq %rdi, %rax +; AVX2-NEXT: imulq %r8 +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: addq %rdx, %rax +; AVX2-NEXT: leaq (%rax,%rax,8), %rax +; AVX2-NEXT: subq %rax, %rdi +; AVX2-NEXT: vmovq %rdi, %xmm1 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX2-NEXT: movabsq $2049638230412172401, %rdx # imm = 0x1C71C71C71C71C71 +; AVX2-NEXT: movq %rcx, %rax +; AVX2-NEXT: imulq %rdx +; AVX2-NEXT: subq %rcx, %rdx +; AVX2-NEXT: movq %rdx, %rax +; AVX2-NEXT: shrq $63, %rax +; AVX2-NEXT: sarq $3, %rdx +; AVX2-NEXT: addq %rax, %rdx +; AVX2-NEXT: leaq (%rdx,%rdx,8), %rax +; AVX2-NEXT: addq %rcx, %rax +; AVX2-NEXT: vmovq %rax, %xmm1 +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [8589934591,8589934591,8589934591,8589934591] +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqq {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: vpextrb $8, %xmm0, %edx +; AVX2-NEXT: vpextrb $0, %xmm1, %ecx +; AVX2-NEXT: # kill: def $al killed $al killed $eax +; AVX2-NEXT: # kill: def $dl killed $dl killed $edx +; AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: test_srem_vec: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: movq %rdx, %rcx +; AVX512VL-NEXT: shlq $31, %rcx +; AVX512VL-NEXT: sarq $31, %rcx +; AVX512VL-NEXT: shlq $31, %rdi +; AVX512VL-NEXT: sarq $31, %rdi +; AVX512VL-NEXT: shlq $31, %rsi +; AVX512VL-NEXT: sarq $31, %rsi +; AVX512VL-NEXT: movabsq $2049638230412172402, %r8 # imm = 0x1C71C71C71C71C72 +; AVX512VL-NEXT: movq %rsi, %rax +; AVX512VL-NEXT: imulq %r8 +; AVX512VL-NEXT: movq %rdx, %rax +; AVX512VL-NEXT: shrq $63, %rax +; AVX512VL-NEXT: addq %rdx, %rax +; AVX512VL-NEXT: leaq (%rax,%rax,8), %rax +; AVX512VL-NEXT: subq %rax, %rsi +; AVX512VL-NEXT: vmovq %rsi, %xmm0 +; AVX512VL-NEXT: movq %rdi, %rax +; AVX512VL-NEXT: imulq %r8 +; AVX512VL-NEXT: movq %rdx, %rax +; AVX512VL-NEXT: shrq $63, %rax +; AVX512VL-NEXT: addq %rdx, %rax +; AVX512VL-NEXT: leaq (%rax,%rax,8), %rax +; AVX512VL-NEXT: subq %rax, %rdi +; AVX512VL-NEXT: vmovq %rdi, %xmm1 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512VL-NEXT: movabsq $2049638230412172401, %rdx # imm = 0x1C71C71C71C71C71 +; AVX512VL-NEXT: movq %rcx, %rax +; AVX512VL-NEXT: imulq %rdx +; AVX512VL-NEXT: subq %rcx, %rdx +; AVX512VL-NEXT: movq %rdx, %rax +; AVX512VL-NEXT: shrq $63, %rax +; AVX512VL-NEXT: sarq $3, %rdx +; AVX512VL-NEXT: addq %rax, %rdx +; AVX512VL-NEXT: leaq (%rdx,%rdx,8), %rax +; AVX512VL-NEXT: addq %rcx, %rax +; AVX512VL-NEXT: vmovq %rax, %xmm1 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0 +; AVX512VL-NEXT: vpcmpneqq {{.*}}(%rip), %ymm0, %k0 +; AVX512VL-NEXT: kshiftrw $1, %k0, %k1 +; AVX512VL-NEXT: kmovw %k1, %edx +; AVX512VL-NEXT: kshiftrw $2, %k0, %k1 +; AVX512VL-NEXT: kmovw %k1, %ecx +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: # kill: def $al killed $al killed $eax +; AVX512VL-NEXT: # kill: def $dl killed $dl killed $edx +; AVX512VL-NEXT: # kill: def $cl killed $cl killed $ecx +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq + %srem = srem <3 x i33> %X, + %cmp = icmp ne <3 x i33> %srem, + ret <3 x i1> %cmp +} diff --git a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll @@ -0,0 +1,362 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X86 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X64 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=X64,SSE2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=X64,SSE41 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=X64,AVX1 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=X64,AVX2 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=X64,AVX512VL + +define i1 @test_urem_odd(i13 %X) nounwind { +; X86-LABEL: test_urem_odd: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $8191, %eax # imm = 0x1FFF +; X86-NEXT: imull $-13107, %eax, %eax # imm = 0xCCCD +; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: cmpl $13108, %eax # imm = 0x3334 +; X86-NEXT: setb %al +; X86-NEXT: retl +; +; X64-LABEL: test_urem_odd: +; X64: # %bb.0: +; X64-NEXT: andl $8191, %edi # imm = 0x1FFF +; X64-NEXT: imull $-13107, %edi, %eax # imm = 0xCCCD +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: cmpl $13108, %eax # imm = 0x3334 +; X64-NEXT: setb %al +; X64-NEXT: retq + %urem = urem i13 %X, 5 + %cmp = icmp eq i13 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_even(i27 %X) nounwind { +; X86-LABEL: test_urem_even: +; X86: # %bb.0: +; X86-NEXT: movl $134217727, %eax # imm = 0x7FFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: imull $-1227133513, %eax, %eax # imm = 0xB6DB6DB7 +; X86-NEXT: rorl %eax +; X86-NEXT: cmpl $306783379, %eax # imm = 0x12492493 +; X86-NEXT: setb %al +; X86-NEXT: retl +; +; X64-LABEL: test_urem_even: +; X64: # %bb.0: +; X64-NEXT: andl $134217727, %edi # imm = 0x7FFFFFF +; X64-NEXT: imull $-1227133513, %edi, %eax # imm = 0xB6DB6DB7 +; X64-NEXT: rorl %eax +; X64-NEXT: cmpl $306783379, %eax # imm = 0x12492493 +; X64-NEXT: setb %al +; X64-NEXT: retq + %urem = urem i27 %X, 14 + %cmp = icmp eq i27 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_odd_setne(i4 %X) nounwind { +; X86-LABEL: test_urem_odd_setne: +; X86: # %bb.0: +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $15, %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: imull $-51, %eax, %eax +; X86-NEXT: cmpb $51, %al +; X86-NEXT: seta %al +; X86-NEXT: retl +; +; X64-LABEL: test_urem_odd_setne: +; X64: # %bb.0: +; X64-NEXT: andb $15, %dil +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: imull $-51, %eax, %eax +; X64-NEXT: cmpb $51, %al +; X64-NEXT: seta %al +; X64-NEXT: retq + %urem = urem i4 %X, 5 + %cmp = icmp ne i4 %urem, 0 + ret i1 %cmp +} + +define i1 @test_urem_negative_odd(i9 %X) nounwind { +; X86-LABEL: test_urem_negative_odd: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $511, %eax # imm = 0x1FF +; X86-NEXT: imull $-7885, %eax, %eax # imm = 0xE133 +; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: cmpl $129, %eax +; X86-NEXT: seta %al +; X86-NEXT: retl +; +; X64-LABEL: test_urem_negative_odd: +; X64: # %bb.0: +; X64-NEXT: andl $511, %edi # imm = 0x1FF +; X64-NEXT: imull $-7885, %edi, %eax # imm = 0xE133 +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: cmpl $129, %eax +; X64-NEXT: seta %al +; X64-NEXT: retq + %urem = urem i9 %X, -5 + %cmp = icmp ne i9 %urem, 0 + ret i1 %cmp +} + +define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { +; X86-LABEL: test_urem_vec: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl $2047, %edx # imm = 0x7FF +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $2047, %eax # imm = 0x7FF +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andl $2047, %ecx # imm = 0x7FF +; X86-NEXT: imull $-5325, %ecx, %ecx # imm = 0xEB33 +; X86-NEXT: addl $10650, %ecx # imm = 0x299A +; X86-NEXT: cmpw $32, %cx +; X86-NEXT: seta %cl +; X86-NEXT: imull $-21845, %eax, %eax # imm = 0xAAAB +; X86-NEXT: rorw %ax +; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: cmpl $10922, %eax # imm = 0x2AAA +; X86-NEXT: seta %al +; X86-NEXT: imull $28087, %edx, %edx # imm = 0x6DB7 +; X86-NEXT: addl $-28087, %edx # imm = 0x9249 +; X86-NEXT: movzwl %dx, %edx +; X86-NEXT: cmpl $9362, %edx # imm = 0x2492 +; X86-NEXT: seta %dl +; X86-NEXT: retl +; +; SSE2-LABEL: test_urem_vec: +; SSE2: # %bb.0: +; SSE2-NEXT: movl %esi, %eax +; SSE2-NEXT: andl $2047, %eax # imm = 0x7FF +; SSE2-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; SSE2-NEXT: shrl $16, %ecx +; SSE2-NEXT: subl %ecx, %eax +; SSE2-NEXT: movzwl %ax, %eax +; SSE2-NEXT: shrl %eax +; SSE2-NEXT: addl %ecx, %eax +; SSE2-NEXT: shrl $2, %eax +; SSE2-NEXT: leal (,%rax,8), %ecx +; SSE2-NEXT: subl %ecx, %eax +; SSE2-NEXT: addl %esi, %eax +; SSE2-NEXT: andl $2047, %edi # imm = 0x7FF +; SSE2-NEXT: imull $43691, %edi, %ecx # imm = 0xAAAB +; SSE2-NEXT: shrl $17, %ecx +; SSE2-NEXT: andl $-2, %ecx +; SSE2-NEXT: leal (%rcx,%rcx,2), %ecx +; SSE2-NEXT: subl %ecx, %edi +; SSE2-NEXT: movd %edi, %xmm0 +; SSE2-NEXT: pinsrw $2, %eax, %xmm0 +; SSE2-NEXT: movl %edx, %eax +; SSE2-NEXT: andl $2047, %eax # imm = 0x7FF +; SSE2-NEXT: imull $161, %eax, %ecx +; SSE2-NEXT: shrl $16, %ecx +; SSE2-NEXT: subl %ecx, %eax +; SSE2-NEXT: movzwl %ax, %eax +; SSE2-NEXT: shrl %eax +; SSE2-NEXT: addl %ecx, %eax +; SSE2-NEXT: shrl $10, %eax +; SSE2-NEXT: imull $2043, %eax, %eax # imm = 0x7FB +; SSE2-NEXT: subl %eax, %edx +; SSE2-NEXT: pinsrw $4, %edx, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl +; SSE2-NEXT: retq +; +; SSE41-LABEL: test_urem_vec: +; SSE41: # %bb.0: +; SSE41-NEXT: movl %esi, %eax +; SSE41-NEXT: andl $2047, %eax # imm = 0x7FF +; SSE41-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; SSE41-NEXT: shrl $16, %ecx +; SSE41-NEXT: subl %ecx, %eax +; SSE41-NEXT: movzwl %ax, %eax +; SSE41-NEXT: shrl %eax +; SSE41-NEXT: addl %ecx, %eax +; SSE41-NEXT: shrl $2, %eax +; SSE41-NEXT: leal (,%rax,8), %ecx +; SSE41-NEXT: subl %ecx, %eax +; SSE41-NEXT: addl %esi, %eax +; SSE41-NEXT: andl $2047, %edi # imm = 0x7FF +; SSE41-NEXT: imull $43691, %edi, %ecx # imm = 0xAAAB +; SSE41-NEXT: shrl $17, %ecx +; SSE41-NEXT: andl $-2, %ecx +; SSE41-NEXT: leal (%rcx,%rcx,2), %ecx +; SSE41-NEXT: subl %ecx, %edi +; SSE41-NEXT: movd %edi, %xmm0 +; SSE41-NEXT: pinsrw $2, %eax, %xmm0 +; SSE41-NEXT: movl %edx, %eax +; SSE41-NEXT: andl $2047, %eax # imm = 0x7FF +; SSE41-NEXT: imull $161, %eax, %ecx +; SSE41-NEXT: shrl $16, %ecx +; SSE41-NEXT: subl %ecx, %eax +; SSE41-NEXT: movzwl %ax, %eax +; SSE41-NEXT: shrl %eax +; SSE41-NEXT: addl %ecx, %eax +; SSE41-NEXT: shrl $10, %eax +; SSE41-NEXT: imull $2043, %eax, %eax # imm = 0x7FB +; SSE41-NEXT: subl %eax, %edx +; SSE41-NEXT: pinsrw $4, %edx, %xmm0 +; SSE41-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: movd %xmm1, %eax +; SSE41-NEXT: pextrb $4, %xmm1, %edx +; SSE41-NEXT: pextrb $8, %xmm1, %ecx +; SSE41-NEXT: # kill: def $al killed $al killed $eax +; SSE41-NEXT: # kill: def $dl killed $dl killed $edx +; SSE41-NEXT: # kill: def $cl killed $cl killed $ecx +; SSE41-NEXT: retq +; +; AVX1-LABEL: test_urem_vec: +; AVX1: # %bb.0: +; AVX1-NEXT: movl %esi, %eax +; AVX1-NEXT: andl $2047, %eax # imm = 0x7FF +; AVX1-NEXT: imull $9363, %eax, %ecx # imm = 0x2493 +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: movzwl %ax, %eax +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $2, %eax +; AVX1-NEXT: leal (,%rax,8), %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: addl %esi, %eax +; AVX1-NEXT: andl $2047, %edi # imm = 0x7FF +; AVX1-NEXT: imull $43691, %edi, %ecx # imm = 0xAAAB +; AVX1-NEXT: shrl $17, %ecx +; AVX1-NEXT: andl $-2, %ecx +; AVX1-NEXT: leal (%rcx,%rcx,2), %ecx +; AVX1-NEXT: subl %ecx, %edi +; AVX1-NEXT: vmovd %edi, %xmm0 +; AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 +; AVX1-NEXT: movl %edx, %eax +; AVX1-NEXT: andl $2047, %eax # imm = 0x7FF +; AVX1-NEXT: imull $161, %eax, %ecx +; AVX1-NEXT: shrl $16, %ecx +; AVX1-NEXT: subl %ecx, %eax +; AVX1-NEXT: movzwl %ax, %eax +; AVX1-NEXT: shrl %eax +; AVX1-NEXT: addl %ecx, %eax +; AVX1-NEXT: shrl $10, %eax +; AVX1-NEXT: imull $2043, %eax, %eax # imm = 0x7FB +; AVX1-NEXT: subl %eax, %edx +; AVX1-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0 +; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: vpextrb $4, %xmm0, %edx +; AVX1-NEXT: vpextrb $8, %xmm0, %ecx +; AVX1-NEXT: # kill: def $al killed $al killed $eax +; AVX1-NEXT: # kill: def $dl killed $dl killed $edx +; AVX1-NEXT: # kill: def $cl killed $cl killed $ecx +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_urem_vec: +; AVX2: # %bb.0: +; AVX2-NEXT: andl $2047, %esi # imm = 0x7FF +; AVX2-NEXT: imull $9363, %esi, %eax # imm = 0x2493 +; AVX2-NEXT: shrl $16, %eax +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: subl %eax, %ecx +; AVX2-NEXT: movzwl %cx, %ecx +; AVX2-NEXT: shrl %ecx +; AVX2-NEXT: addl %eax, %ecx +; AVX2-NEXT: shrl $2, %ecx +; AVX2-NEXT: leal (,%rcx,8), %eax +; AVX2-NEXT: subl %eax, %ecx +; AVX2-NEXT: addl %esi, %ecx +; AVX2-NEXT: andl $2047, %edi # imm = 0x7FF +; AVX2-NEXT: imull $43691, %edi, %eax # imm = 0xAAAB +; AVX2-NEXT: shrl $17, %eax +; AVX2-NEXT: andl $-2, %eax +; AVX2-NEXT: leal (%rax,%rax,2), %eax +; AVX2-NEXT: subl %eax, %edi +; AVX2-NEXT: vmovd %edi, %xmm0 +; AVX2-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 +; AVX2-NEXT: andl $2047, %edx # imm = 0x7FF +; AVX2-NEXT: imull $161, %edx, %eax +; AVX2-NEXT: shrl $16, %eax +; AVX2-NEXT: movl %edx, %ecx +; AVX2-NEXT: subl %eax, %ecx +; AVX2-NEXT: movzwl %cx, %ecx +; AVX2-NEXT: shrl %ecx +; AVX2-NEXT: addl %eax, %ecx +; AVX2-NEXT: shrl $10, %ecx +; AVX2-NEXT: imull $2043, %ecx, %eax # imm = 0x7FB +; AVX2-NEXT: subl %eax, %edx +; AVX2-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2047,2047,2047,2047] +; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: vpextrb $4, %xmm0, %edx +; AVX2-NEXT: vpextrb $8, %xmm0, %ecx +; AVX2-NEXT: # kill: def $al killed $al killed $eax +; AVX2-NEXT: # kill: def $dl killed $dl killed $edx +; AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; AVX2-NEXT: retq +; +; AVX512VL-LABEL: test_urem_vec: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: andl $2047, %esi # imm = 0x7FF +; AVX512VL-NEXT: imull $9363, %esi, %eax # imm = 0x2493 +; AVX512VL-NEXT: shrl $16, %eax +; AVX512VL-NEXT: movl %esi, %ecx +; AVX512VL-NEXT: subl %eax, %ecx +; AVX512VL-NEXT: movzwl %cx, %ecx +; AVX512VL-NEXT: shrl %ecx +; AVX512VL-NEXT: addl %eax, %ecx +; AVX512VL-NEXT: shrl $2, %ecx +; AVX512VL-NEXT: leal (,%rcx,8), %eax +; AVX512VL-NEXT: subl %eax, %ecx +; AVX512VL-NEXT: addl %esi, %ecx +; AVX512VL-NEXT: andl $2047, %edi # imm = 0x7FF +; AVX512VL-NEXT: imull $43691, %edi, %eax # imm = 0xAAAB +; AVX512VL-NEXT: shrl $17, %eax +; AVX512VL-NEXT: andl $-2, %eax +; AVX512VL-NEXT: leal (%rax,%rax,2), %eax +; AVX512VL-NEXT: subl %eax, %edi +; AVX512VL-NEXT: vmovd %edi, %xmm0 +; AVX512VL-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: andl $2047, %edx # imm = 0x7FF +; AVX512VL-NEXT: imull $161, %edx, %eax +; AVX512VL-NEXT: shrl $16, %eax +; AVX512VL-NEXT: movl %edx, %ecx +; AVX512VL-NEXT: subl %eax, %ecx +; AVX512VL-NEXT: movzwl %cx, %ecx +; AVX512VL-NEXT: shrl %ecx +; AVX512VL-NEXT: addl %eax, %ecx +; AVX512VL-NEXT: shrl $10, %ecx +; AVX512VL-NEXT: imull $2043, %ecx, %eax # imm = 0x7FB +; AVX512VL-NEXT: subl %eax, %edx +; AVX512VL-NEXT: vpinsrw $4, %edx, %xmm0, %xmm0 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-NEXT: vpcmpneqd {{.*}}(%rip), %xmm0, %k0 +; AVX512VL-NEXT: kshiftrw $1, %k0, %k1 +; AVX512VL-NEXT: kmovw %k1, %edx +; AVX512VL-NEXT: kshiftrw $2, %k0, %k1 +; AVX512VL-NEXT: kmovw %k1, %ecx +; AVX512VL-NEXT: kmovw %k0, %eax +; AVX512VL-NEXT: # kill: def $al killed $al killed $eax +; AVX512VL-NEXT: # kill: def $dl killed $dl killed $edx +; AVX512VL-NEXT: # kill: def $cl killed $cl killed $ecx +; AVX512VL-NEXT: retq + %urem = urem <3 x i11> %X, + %cmp = icmp ne <3 x i11> %urem, + ret <3 x i1> %cmp +}