diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll @@ -0,0 +1,403 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +; fold (add (umax X, C), -C) --> (usubsat X, C) + +define <2 x i64> @add_umax_v2i64(<2 x i64> %a0) { +; CHECK-LABEL: add_umax_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 7 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vmaxu.vx v25, v8, a0 +; CHECK-NEXT: vadd.vi v8, v25, -7 +; CHECK-NEXT: ret + %v1 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a0, <2 x i64> ) + %v2 = add <2 x i64> %v1, + ret <2 x i64> %v2 +} + +define @add_umax_nxv2i64( %a0) { +; CHECK-LABEL: add_umax_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 7 +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; CHECK-NEXT: vmaxu.vx v26, v8, a0 +; CHECK-NEXT: vadd.vi v8, v26, -7 +; CHECK-NEXT: ret + %ins1 = insertelement poison, i64 7, i32 0 + %splat1 = shufflevector %ins1, poison, zeroinitializer + %ins2 = insertelement poison, i64 -7, i32 0 + %splat2 = shufflevector %ins2, poison, zeroinitializer + %v1 = call @llvm.umax.nxv2i64( %a0, %splat1) + %v2 = add %v1, %splat2 + ret %v2 +} + +; Try to find umax(a,b) - b or a - umin(a,b) patterns +; they may be converted to usubsat(a,b). + +define <2 x i64> @sub_umax_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +; RV32-LABEL: sub_umax_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vsub.vv v25, v8, v9 +; RV32-NEXT: vmsltu.vv v0, v8, v25 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vmerge.vvm v8, v25, v26, v0 +; RV32-NEXT: ret +; +; RV64-LABEL: sub_umax_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vsub.vv v25, v8, v9 +; RV64-NEXT: vmsltu.vv v0, v8, v25 +; RV64-NEXT: vmerge.vim v8, v25, 0, v0 +; RV64-NEXT: ret + %v1 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a0, <2 x i64> %a1) + %v2 = sub <2 x i64> %v1, %a1 + ret <2 x i64> %v2 +} + +define @sub_umax_nxv2i64( %a0, %a1) { +; CHECK-LABEL: sub_umax_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmaxu.vv v26, v8, v10 +; CHECK-NEXT: vsub.vv v8, v26, v10 +; CHECK-NEXT: ret + %v1 = call @llvm.umax.nxv2i64( %a0, %a1) + %v2 = sub %v1, %a1 + ret %v2 +} + +define <2 x i64> @sub_umin_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +; RV32-LABEL: sub_umin_v2i64: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vsub.vv v25, v8, v9 +; RV32-NEXT: vmsltu.vv v0, v8, v25 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: vmerge.vvm v8, v25, v26, v0 +; RV32-NEXT: ret +; +; RV64-LABEL: sub_umin_v2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vsub.vv v25, v8, v9 +; RV64-NEXT: vmsltu.vv v0, v8, v25 +; RV64-NEXT: vmerge.vim v8, v25, 0, v0 +; RV64-NEXT: ret + %v1 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a0, <2 x i64> %a1) + %v2 = sub <2 x i64> %a0, %v1 + ret <2 x i64> %v2 +} + +define @sub_umin_nxv2i64( %a0, %a1) { +; CHECK-LABEL: sub_umin_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmaxu.vv v26, v8, v10 +; CHECK-NEXT: vsub.vv v8, v26, v10 +; CHECK-NEXT: ret + %v1 = call @llvm.umin.nxv2i64( %a0, %a1) + %v2 = sub %a0, %v1 + ret %v2 +} + +; Match VSELECTs into sub with unsigned saturation. + +; x >= y ? x-y : 0 --> usubsat x, y + +define <2 x i64> @vselect_sub_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: vselect_sub_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 +; CHECK-NEXT: vsub.vv v25, v8, v9 +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vmerge.vvm v8, v26, v25, v0 +; CHECK-NEXT: ret + %cmp = icmp uge <2 x i64> %a0, %a1 + %v1 = sub <2 x i64> %a0, %a1 + %v2 = select <2 x i1> %cmp, <2 x i64> %v1, <2 x i64> zeroinitializer + ret <2 x i64> %v2 +} + +define @vselect_sub_nxv2i64( %a0, %a1) { +; CHECK-LABEL: vselect_sub_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vsub.vv v26, v8, v10 +; CHECK-NEXT: vmv.v.i v28, 0 +; CHECK-NEXT: vmerge.vvm v8, v28, v26, v0 +; CHECK-NEXT: ret + %cmp = icmp uge %a0, %a1 + %v1 = sub %a0, %a1 + %v2 = select %cmp, %v1, zeroinitializer + ret %v2 +} + +define <8 x i16> @vselect_sub_2_v8i16(<8 x i16> %x, i16 zeroext %w) nounwind { +; CHECK-LABEL: vselect_sub_2_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vsub.vx v25, v8, a0 +; CHECK-NEXT: vmerge.vim v8, v25, 0, v0 +; CHECK-NEXT: ret +entry: + %0 = insertelement <8 x i16> undef, i16 %w, i32 0 + %broadcast15 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer + %1 = icmp ult <8 x i16> %x, %broadcast15 + %2 = sub <8 x i16> %x, %broadcast15 + %res = select <8 x i1> %1, <8 x i16> zeroinitializer, <8 x i16> %2 + ret <8 x i16> %res +} + +define @vselect_sub_2_nxv8i16( %x, i16 zeroext %w) nounwind { +; CHECK-LABEL: vselect_sub_2_nxv8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vsub.vx v26, v8, a0 +; CHECK-NEXT: vmerge.vim v8, v26, 0, v0 +; CHECK-NEXT: ret +entry: + %0 = insertelement undef, i16 %w, i32 0 + %broadcast15 = shufflevector %0, undef, zeroinitializer + %1 = icmp ult %x, %broadcast15 + %2 = sub %x, %broadcast15 + %res = select %1, zeroinitializer, %2 + ret %res +} + +; x > y ? x-y : 0 --> usubsat x, y +; x > C-1 ? x+-C : 0 --> usubsat x, C + +define <2 x i64> @vselect_add_const_v2i64(<2 x i64> %a0) { +; CHECK-LABEL: vselect_add_const_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vadd.vi v25, v8, -6 +; CHECK-NEXT: addi a0, zero, 5 +; CHECK-NEXT: vmsgtu.vx v0, v8, a0 +; CHECK-NEXT: vmv.v.i v26, 0 +; CHECK-NEXT: vmerge.vvm v8, v26, v25, v0 +; CHECK-NEXT: ret + %v1 = add <2 x i64> %a0, + %cmp = icmp ugt <2 x i64> %a0, + %v2 = select <2 x i1> %cmp, <2 x i64> %v1, <2 x i64> zeroinitializer + ret <2 x i64> %v2 +} + +define @vselect_add_const_nxv2i64( %a0) { +; CHECK-LABEL: vselect_add_const_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vadd.vi v26, v8, -6 +; CHECK-NEXT: vmsgtu.vi v0, v8, 5 +; CHECK-NEXT: vmv.v.i v28, 0 +; CHECK-NEXT: vmerge.vvm v8, v28, v26, v0 +; CHECK-NEXT: ret + %cm1 = insertelement poison, i64 -6, i32 0 + %splatcm1 = shufflevector %cm1, poison, zeroinitializer + %nc = insertelement poison, i64 5, i32 0 + %splatnc = shufflevector %nc, poison, zeroinitializer + %v1 = add %a0, %splatcm1 + %cmp = icmp ugt %a0, %splatnc + %v2 = select %cmp, %v1, zeroinitializer + ret %v2 +} + +define <2 x i16> @vselect_add_const_signbit_v2i16(<2 x i16> %a0) { +; RV32-LABEL: vselect_add_const_signbit_v2i16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 8 +; RV32-NEXT: addi a0, a0, -2 +; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; RV32-NEXT: vmsgtu.vx v0, v8, a0 +; RV32-NEXT: lui a0, 1048568 +; RV32-NEXT: addi a0, a0, 1 +; RV32-NEXT: vadd.vx v25, v8, a0 +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vmerge.vvm v8, v26, v25, v0 +; RV32-NEXT: ret +; +; RV64-LABEL: vselect_add_const_signbit_v2i16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, 8 +; RV64-NEXT: addiw a0, a0, -2 +; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; RV64-NEXT: vmsgtu.vx v0, v8, a0 +; RV64-NEXT: lui a0, 1048568 +; RV64-NEXT: addiw a0, a0, 1 +; RV64-NEXT: vadd.vx v25, v8, a0 +; RV64-NEXT: vmv.v.i v26, 0 +; RV64-NEXT: vmerge.vvm v8, v26, v25, v0 +; RV64-NEXT: ret + %cmp = icmp ugt <2 x i16> %a0, + %v1 = add <2 x i16> %a0, + %v2 = select <2 x i1> %cmp, <2 x i16> %v1, <2 x i16> zeroinitializer + ret <2 x i16> %v2 +} + +define @vselect_add_const_signbit_nxv2i16( %a0) { +; RV32-LABEL: vselect_add_const_signbit_nxv2i16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 8 +; RV32-NEXT: addi a0, a0, -2 +; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; RV32-NEXT: vmsgtu.vx v0, v8, a0 +; RV32-NEXT: lui a0, 1048568 +; RV32-NEXT: addi a0, a0, 1 +; RV32-NEXT: vadd.vx v25, v8, a0 +; RV32-NEXT: vmv.v.i v26, 0 +; RV32-NEXT: vmerge.vvm v8, v26, v25, v0 +; RV32-NEXT: ret +; +; RV64-LABEL: vselect_add_const_signbit_nxv2i16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, 8 +; RV64-NEXT: addiw a0, a0, -2 +; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; RV64-NEXT: vmsgtu.vx v0, v8, a0 +; RV64-NEXT: lui a0, 1048568 +; RV64-NEXT: addiw a0, a0, 1 +; RV64-NEXT: vadd.vx v25, v8, a0 +; RV64-NEXT: vmv.v.i v26, 0 +; RV64-NEXT: vmerge.vvm v8, v26, v25, v0 +; RV64-NEXT: ret + %cm1 = insertelement poison, i16 32766, i32 0 + %splatcm1 = shufflevector %cm1, poison, zeroinitializer + %nc = insertelement poison, i16 -32767, i32 0 + %splatnc = shufflevector %nc, poison, zeroinitializer + %cmp = icmp ugt %a0, %splatcm1 + %v1 = add %a0, %splatnc + %v2 = select %cmp, %v1, zeroinitializer + ret %v2 +} + +; x s< 0 ? x^C : 0 --> usubsat x, C + +define <2 x i16> @vselect_xor_const_signbit_v2i16(<2 x i16> %a0) { +; CHECK-LABEL: vselect_xor_const_signbit_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vxor.vx v26, v8, a0 +; CHECK-NEXT: vmerge.vvm v8, v25, v26, v0 +; CHECK-NEXT: ret + %cmp = icmp slt <2 x i16> %a0, zeroinitializer + %v1 = xor <2 x i16> %a0, + %v2 = select <2 x i1> %cmp, <2 x i16> %v1, <2 x i16> zeroinitializer + ret <2 x i16> %v2 +} + +define @vselect_xor_const_signbit_nxv2i16( %a0) { +; CHECK-LABEL: vselect_xor_const_signbit_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vmv.v.i v25, 0 +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vxor.vx v26, v8, a0 +; CHECK-NEXT: vmerge.vvm v8, v25, v26, v0 +; CHECK-NEXT: ret + %cmp = icmp slt %a0, zeroinitializer + %ins = insertelement poison, i16 -32768, i32 0 + %splat = shufflevector %ins, poison, zeroinitializer + %v1 = xor %a0, %splat + %v2 = select %cmp, %v1, zeroinitializer + ret %v2 +} + +; Match VSELECTs into add with unsigned saturation. + +; x <= x+y ? x+y : ~0 --> uaddsat x, y +; x+y >= x ? x+y : ~0 --> uaddsat x, y + +define <2 x i64> @vselect_add_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +; CHECK-LABEL: vselect_add_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vadd.vv v25, v8, v9 +; CHECK-NEXT: vmsleu.vv v0, v8, v25 +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vmerge.vvm v8, v26, v25, v0 +; CHECK-NEXT: ret + %v1 = add <2 x i64> %a0, %a1 + %cmp = icmp ule <2 x i64> %a0, %v1 + %v2 = select <2 x i1> %cmp, <2 x i64> %v1, <2 x i64> + ret <2 x i64> %v2 +} + +define @vselect_add_nxv2i64( %a0, %a1) { +; CHECK-LABEL: vselect_add_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vadd.vv v26, v8, v10 +; CHECK-NEXT: vmsleu.vv v0, v8, v26 +; CHECK-NEXT: vmv.v.i v28, -1 +; CHECK-NEXT: vmerge.vvm v8, v28, v26, v0 +; CHECK-NEXT: ret + %v1 = add %a0, %a1 + %cmp = icmp ule %a0, %v1 + %allones = insertelement poison, i64 -1, i32 0 + %splatallones = shufflevector %allones, poison, zeroinitializer + %v2 = select %cmp, %v1, %splatallones + ret %v2 +} + +; if the rhs is a constant we have to reverse the const canonicalization. +; x >= ~C ? x+C : ~0 --> uaddsat x, C + +define <2 x i64> @vselect_add_const_2_v2i64(<2 x i64> %a0) { +; CHECK-LABEL: vselect_add_const_2_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vadd.vi v25, v8, 6 +; CHECK-NEXT: vmsleu.vi v0, v8, -7 +; CHECK-NEXT: vmv.v.i v26, -1 +; CHECK-NEXT: vmerge.vvm v8, v26, v25, v0 +; CHECK-NEXT: ret + %v1 = add <2 x i64> %a0, + %cmp = icmp ule <2 x i64> %a0, + %v2 = select <2 x i1> %cmp, <2 x i64> %v1, <2 x i64> + ret <2 x i64> %v2 +} + +define @vselect_add_const_2_nxv2i64( %a0) { +; CHECK-LABEL: vselect_add_const_2_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vadd.vi v26, v8, 6 +; CHECK-NEXT: vmsleu.vi v0, v8, -7 +; CHECK-NEXT: vmv.v.i v28, -1 +; CHECK-NEXT: vmerge.vvm v8, v28, v26, v0 +; CHECK-NEXT: ret + %cm1 = insertelement poison, i64 6, i32 0 + %splatcm1 = shufflevector %cm1, poison, zeroinitializer + %nc = insertelement poison, i64 -7, i32 0 + %splatnc = shufflevector %nc, poison, zeroinitializer + %v1 = add %a0, %splatcm1 + %cmp = icmp ule %a0, %splatnc + %allones = insertelement poison, i64 -1, i32 0 + %splatallones = shufflevector %allones, poison, zeroinitializer + %v2 = select %cmp, %v1, %splatallones + ret %v2 +} + +declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>) +declare @llvm.umin.nxv2i64(, ) +declare @llvm.umax.nxv2i64(, )