Index: llvm/test/CodeGen/AArch64/srem-lkk.ll =================================================================== --- llvm/test/CodeGen/AArch64/srem-lkk.ll +++ llvm/test/CodeGen/AArch64/srem-lkk.ll @@ -91,44 +91,6 @@ ret i32 %3 } -; Don't fold for divisors that are a power of two. -define i32 @dont_fold_srem_power_of_two(i32 %x) { -; CHECK-LABEL: dont_fold_srem_power_of_two: -; CHECK: // %bb.0: -; CHECK-NEXT: negs w8, w0 -; CHECK-NEXT: and w9, w0, #0x3f -; CHECK-NEXT: and w8, w8, #0x3f -; CHECK-NEXT: csneg w0, w9, w8, mi -; CHECK-NEXT: ret - %1 = srem i32 %x, 64 - ret i32 %1 -} - -; Don't fold if the divisor is one. -define i32 @dont_fold_srem_one(i32 %x) { -; CHECK-LABEL: dont_fold_srem_one: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret - %1 = srem i32 %x, 1 - ret i32 %1 -} - -; Don't fold if the divisor is 2^31. -define i32 @dont_fold_srem_i32_smax(i32 %x) { -; CHECK-LABEL: dont_fold_srem_i32_smax: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2147483647 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: add w8, w0, w8 -; CHECK-NEXT: csel w8, w8, w0, lt -; CHECK-NEXT: and w8, w8, #0x80000000 -; CHECK-NEXT: add w0, w0, w8 -; CHECK-NEXT: ret - %1 = srem i32 %x, 2147483648 - ret i32 %1 -} - ; Don't fold i64 srem define i64 @dont_fold_srem_i64(i64 %x) { ; CHECK-LABEL: dont_fold_srem_i64: Index: llvm/test/CodeGen/AArch64/srem-pow2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/srem-pow2.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define i16 @fold_srem_1_i16(i16 %x) { +; CHECK-LABEL: fold_srem_1_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %1 = srem i16 %x, 1 + ret i16 %1 +} + +define i32 @fold_srem_1_i32(i32 %x) { +; CHECK-LABEL: fold_srem_1_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret + %1 = srem i32 %x, 1 + ret i32 %1 +} + +define i64 @fold_srem_1_i64(i64 %x) { +; CHECK-LABEL: fold_srem_1_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: ret + %1 = srem i64 %x, 1 + ret i64 %1 +} + +define i16 @fold_srem_2_i16(i16 %x) { +; CHECK-LABEL: fold_srem_2_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0x8000 +; CHECK-NEXT: add w8, w0, w8, lsr #15 +; CHECK-NEXT: and w8, w8, #0xfffffffe +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = srem i16 %x, 2 + ret i16 %1 +} + +define i32 @fold_srem_2_i64(i32 %x) { +; CHECK-LABEL: fold_srem_2_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: cinc w8, w0, lt +; CHECK-NEXT: and w8, w8, #0xfffffffe +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = srem i32 %x, 2 + ret i32 %1 +} + +define i64 @fold_srem_2_i32(i64 %x) { +; CHECK-LABEL: fold_srem_2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: cinc x8, x0, lt +; CHECK-NEXT: and x8, x8, #0xfffffffffffffffe +; CHECK-NEXT: sub x0, x0, x8 +; CHECK-NEXT: ret + %1 = srem i64 %x, 2 + ret i64 %1 +} + +define i16 @fold_srem_pow2_i16(i16 %x) { +; CHECK-LABEL: fold_srem_pow2_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: ubfx w8, w8, #25, #6 +; CHECK-NEXT: add w8, w0, w8 +; CHECK-NEXT: and w8, w8, #0xffffffc0 +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = srem i16 %x, 64 + ret i16 %1 +} + +define i32 @fold_srem_pow2_i32(i32 %x) { +; CHECK-LABEL: fold_srem_pow2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, #63 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: csel w8, w8, w0, lt +; CHECK-NEXT: and w8, w8, #0xffffffc0 +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %1 = srem i32 %x, 64 + ret i32 %1 +} + +define i64 @fold_srem_pow2_i64(i64 %x) { +; CHECK-LABEL: fold_srem_pow2_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: add x8, x0, #63 +; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: csel x8, x8, x0, lt +; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0 +; CHECK-NEXT: sub x0, x0, x8 +; CHECK-NEXT: ret + %1 = srem i64 %x, 64 + ret i64 %1 +} + +define i16 @fold_srem_smax_i16(i16 %x) { +; CHECK-LABEL: fold_srem_smax_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: ubfx w8, w8, #16, #15 +; CHECK-NEXT: add w8, w0, w8 +; CHECK-NEXT: and w8, w8, #0xffff8000 +; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: ret + %1 = srem i16 %x, 32768 + ret i16 %1 +} + +define i32 @fold_srem_smax_i32(i32 %x) { +; CHECK-LABEL: fold_srem_smax_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: add w8, w0, w8 +; CHECK-NEXT: csel w8, w8, w0, lt +; CHECK-NEXT: and w8, w8, #0x80000000 +; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: ret + %1 = srem i32 %x, 2147483648 + ret i32 %1 +} + +define i64 @fold_srem_smax_i64(i64 %x) { +; CHECK-LABEL: fold_srem_smax_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #9223372036854775807 +; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: add x8, x0, x8 +; CHECK-NEXT: csel x8, x8, x0, lt +; CHECK-NEXT: and x8, x8, #0x8000000000000000 +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: ret + %1 = srem i64 %x, -9223372036854775808 + ret i64 %1 +}