diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -192,6 +192,12 @@ UDIV_VL, UREM_VL, XOR_VL, + + SADDSAT_VL, + UADDSAT_VL, + SSUBSAT_VL, + USUBSAT_VL, + FADD_VL, FSUB_VL, FMUL_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -516,6 +516,11 @@ setOperationAction(ISD::FP_TO_SINT, VT, Custom); setOperationAction(ISD::FP_TO_UINT, VT, Custom); + setOperationAction(ISD::SADDSAT, VT, Legal); + setOperationAction(ISD::UADDSAT, VT, Legal); + setOperationAction(ISD::SSUBSAT, VT, Legal); + setOperationAction(ISD::USUBSAT, VT, Legal); + // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" // nodes which truncate by one power of two at a time. setOperationAction(ISD::TRUNCATE, VT, Custom); @@ -742,6 +747,11 @@ setOperationAction(ISD::MULHS, VT, Custom); setOperationAction(ISD::MULHU, VT, Custom); + setOperationAction(ISD::SADDSAT, VT, Custom); + setOperationAction(ISD::UADDSAT, VT, Custom); + setOperationAction(ISD::SSUBSAT, VT, Custom); + setOperationAction(ISD::USUBSAT, VT, Custom); + setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); @@ -2568,6 +2578,14 @@ assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); return SDValue(); + case ISD::SADDSAT: + return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL); + case ISD::UADDSAT: + return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL); + case ISD::SSUBSAT: + return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL); + case ISD::USUBSAT: + return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL); case ISD::FADD: return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL); case ISD::FSUB: @@ -8351,6 +8369,10 @@ NODE_NAME_CASE(UDIV_VL) NODE_NAME_CASE(UREM_VL) NODE_NAME_CASE(XOR_VL) + NODE_NAME_CASE(SADDSAT_VL) + NODE_NAME_CASE(UADDSAT_VL) + NODE_NAME_CASE(SSUBSAT_VL) + NODE_NAME_CASE(USUBSAT_VL) NODE_NAME_CASE(FADD_VL) NODE_NAME_CASE(FSUB_VL) NODE_NAME_CASE(FMUL_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -523,6 +523,12 @@ vti.RegClass:$rs2, simm5:$rs1, VMV0:$vm, vti.AVL, vti.Log2SEW)>; } +// 12.1. Vector Single-Width Saturating Add and Subtract +defm : VPatBinarySDNode_VV_VX_VI; +defm : VPatBinarySDNode_VV_VX_VI; +defm : VPatBinarySDNode_VV_VX; +defm : VPatBinarySDNode_VV_VX; + // 16.1. Vector Mask-Register Logical Instructions foreach mti = AllMasks in { def : Pat<(mti.Mask (and VR:$rs1, VR:$rs2)), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -89,6 +89,12 @@ def riscv_smax_vl : SDNode<"RISCVISD::SMAX_VL", SDT_RISCVIntBinOp_VL>; def riscv_umin_vl : SDNode<"RISCVISD::UMIN_VL", SDT_RISCVIntBinOp_VL>; def riscv_umax_vl : SDNode<"RISCVISD::UMAX_VL", SDT_RISCVIntBinOp_VL>; + +def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL>; +def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL>; +def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>; +def riscv_usubsat_vl : SDNode<"RISCVISD::USUBSAT_VL", SDT_RISCVIntBinOp_VL>; + def riscv_fadd_vl : SDNode<"RISCVISD::FADD_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>; def riscv_fsub_vl : SDNode<"RISCVISD::FSUB_VL", SDT_RISCVFPBinOp_VL>; def riscv_fmul_vl : SDNode<"RISCVISD::FMUL_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>; @@ -903,6 +909,12 @@ XLenVT:$imm5, GPR:$vl, vti.Log2SEW)>; } +// 12.1. Vector Single-Width Saturating Add and Subtract +defm : VPatBinaryVL_VV_VX_VI; +defm : VPatBinaryVL_VV_VX_VI; +defm : VPatBinaryVL_VV_VX; +defm : VPatBinaryVL_VV_VX; + } // Predicates = [HasStdExtV] // 15.1. Vector Single-Width Integer Reduction Instructions diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll --- a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll @@ -11,8 +11,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, zero, 7 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vmaxu.vx v25, v8, a0 -; CHECK-NEXT: vadd.vi v8, v25, -7 +; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret %v1 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a0, <2 x i64> ) %v2 = add <2 x i64> %v1, @@ -40,24 +39,11 @@ ; they may be converted to usubsat(a,b). define <2 x i64> @sub_umax_v2i64(<2 x i64> %a0, <2 x i64> %a1) { -; RV32-LABEL: sub_umax_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vsub.vv v25, v8, v9 -; RV32-NEXT: vmsltu.vv v0, v8, v25 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vmerge.vvm v8, v25, v26, v0 -; RV32-NEXT: ret -; -; RV64-LABEL: sub_umax_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vsub.vv v25, v8, v9 -; RV64-NEXT: vmsltu.vv v0, v8, v25 -; RV64-NEXT: vmerge.vim v8, v25, 0, v0 -; RV64-NEXT: ret +; CHECK-LABEL: sub_umax_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret %v1 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a0, <2 x i64> %a1) %v2 = sub <2 x i64> %v1, %a1 ret <2 x i64> %v2 @@ -67,8 +53,7 @@ ; CHECK-LABEL: sub_umax_nxv2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmaxu.vv v26, v8, v10 -; CHECK-NEXT: vsub.vv v8, v26, v10 +; CHECK-NEXT: vssubu.vv v8, v8, v10 ; CHECK-NEXT: ret %v1 = call @llvm.umax.nxv2i64( %a0, %a1) %v2 = sub %v1, %a1 @@ -76,24 +61,11 @@ } define <2 x i64> @sub_umin_v2i64(<2 x i64> %a0, <2 x i64> %a1) { -; RV32-LABEL: sub_umin_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vsub.vv v25, v8, v9 -; RV32-NEXT: vmsltu.vv v0, v8, v25 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vmerge.vvm v8, v25, v26, v0 -; RV32-NEXT: ret -; -; RV64-LABEL: sub_umin_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV64-NEXT: vsub.vv v25, v8, v9 -; RV64-NEXT: vmsltu.vv v0, v8, v25 -; RV64-NEXT: vmerge.vim v8, v25, 0, v0 -; RV64-NEXT: ret +; CHECK-LABEL: sub_umin_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret %v1 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a0, <2 x i64> %a1) %v2 = sub <2 x i64> %a0, %v1 ret <2 x i64> %v2 @@ -103,8 +75,7 @@ ; CHECK-LABEL: sub_umin_nxv2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmaxu.vv v26, v8, v10 -; CHECK-NEXT: vsub.vv v8, v26, v10 +; CHECK-NEXT: vssubu.vv v8, v8, v10 ; CHECK-NEXT: ret %v1 = call @llvm.umin.nxv2i64( %a0, %a1) %v2 = sub %a0, %v1 @@ -119,10 +90,7 @@ ; CHECK-LABEL: vselect_sub_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v0, v9, v8 -; CHECK-NEXT: vsub.vv v25, v8, v9 -; CHECK-NEXT: vmv.v.i v26, 0 -; CHECK-NEXT: vmerge.vvm v8, v26, v25, v0 +; CHECK-NEXT: vssubu.vv v8, v8, v9 ; CHECK-NEXT: ret %cmp = icmp uge <2 x i64> %a0, %a1 %v1 = sub <2 x i64> %a0, %a1 @@ -149,9 +117,7 @@ ; CHECK-LABEL: vselect_sub_2_v8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; CHECK-NEXT: vmsltu.vx v0, v8, a0 -; CHECK-NEXT: vsub.vx v25, v8, a0 -; CHECK-NEXT: vmerge.vim v8, v25, 0, v0 +; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret entry: %0 = insertelement <8 x i16> undef, i16 %w, i32 0 @@ -185,12 +151,9 @@ define <2 x i64> @vselect_add_const_v2i64(<2 x i64> %a0) { ; CHECK-LABEL: vselect_add_const_v2i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 6 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vadd.vi v25, v8, -6 -; CHECK-NEXT: addi a0, zero, 5 -; CHECK-NEXT: vmsgtu.vx v0, v8, a0 -; CHECK-NEXT: vmv.v.i v26, 0 -; CHECK-NEXT: vmerge.vvm v8, v26, v25, v0 +; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret %v1 = add <2 x i64> %a0, %cmp = icmp ugt <2 x i64> %a0, @@ -221,27 +184,17 @@ ; RV32-LABEL: vselect_add_const_signbit_v2i16: ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 8 -; RV32-NEXT: addi a0, a0, -2 +; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; RV32-NEXT: vmsgtu.vx v0, v8, a0 -; RV32-NEXT: lui a0, 1048568 -; RV32-NEXT: addi a0, a0, 1 -; RV32-NEXT: vadd.vx v25, v8, a0 -; RV32-NEXT: vmv.v.i v26, 0 -; RV32-NEXT: vmerge.vvm v8, v26, v25, v0 +; RV32-NEXT: vssubu.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_add_const_signbit_v2i16: ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 8 -; RV64-NEXT: addiw a0, a0, -2 +; RV64-NEXT: addiw a0, a0, -1 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; RV64-NEXT: vmsgtu.vx v0, v8, a0 -; RV64-NEXT: lui a0, 1048568 -; RV64-NEXT: addiw a0, a0, 1 -; RV64-NEXT: vadd.vx v25, v8, a0 -; RV64-NEXT: vmv.v.i v26, 0 -; RV64-NEXT: vmerge.vvm v8, v26, v25, v0 +; RV64-NEXT: vssubu.vx v8, v8, a0 ; RV64-NEXT: ret %cmp = icmp ugt <2 x i16> %a0, %v1 = add <2 x i16> %a0, @@ -290,12 +243,9 @@ define <2 x i16> @vselect_xor_const_signbit_v2i16(<2 x i16> %a0) { ; CHECK-LABEL: vselect_xor_const_signbit_v2i16: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu -; CHECK-NEXT: vmsle.vi v0, v8, -1 -; CHECK-NEXT: vmv.v.i v25, 0 -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vxor.vx v26, v8, a0 -; CHECK-NEXT: vmerge.vvm v8, v25, v26, v0 +; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret %cmp = icmp slt <2 x i16> %a0, zeroinitializer %v1 = xor <2 x i16> %a0, @@ -330,10 +280,7 @@ ; CHECK-LABEL: vselect_add_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vadd.vv v25, v8, v9 -; CHECK-NEXT: vmsleu.vv v0, v8, v25 -; CHECK-NEXT: vmv.v.i v26, -1 -; CHECK-NEXT: vmerge.vvm v8, v26, v25, v0 +; CHECK-NEXT: vsaddu.vv v8, v8, v9 ; CHECK-NEXT: ret %v1 = add <2 x i64> %a0, %a1 %cmp = icmp ule <2 x i64> %a0, %v1 @@ -365,10 +312,7 @@ ; CHECK-LABEL: vselect_add_const_2_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vadd.vi v25, v8, 6 -; CHECK-NEXT: vmsleu.vi v0, v8, -7 -; CHECK-NEXT: vmv.v.i v26, -1 -; CHECK-NEXT: vmerge.vvm v8, v26, v25, v0 +; CHECK-NEXT: vsaddu.vi v8, v8, 6 ; CHECK-NEXT: ret %v1 = add <2 x i64> %a0, %cmp = icmp ule <2 x i64> %a0, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll @@ -0,0 +1,633 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) + +define <2 x i8> @sadd_v2i8_vv(<2 x i8> %va, <2 x i8> %b) { +; CHECK-LABEL: sadd_v2i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b) + ret <2 x i8> %v +} + +define <2 x i8> @sadd_v2i8_vx(<2 x i8> %va, i8 %b) { +; CHECK-LABEL: sadd_v2i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb) + ret <2 x i8> %v +} + +define <2 x i8> @sadd_v2i8_vi(<2 x i8> %va) { +; CHECK-LABEL: sadd_v2i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 5, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>) + +define <4 x i8> @sadd_v4i8_vv(<4 x i8> %va, <4 x i8> %b) { +; CHECK-LABEL: sadd_v4i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b) + ret <4 x i8> %v +} + +define <4 x i8> @sadd_v4i8_vx(<4 x i8> %va, i8 %b) { +; CHECK-LABEL: sadd_v4i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb) + ret <4 x i8> %v +} + +define <4 x i8> @sadd_v4i8_vi(<4 x i8> %va) { +; CHECK-LABEL: sadd_v4i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 5, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb) + ret <4 x i8> %v +} + +declare <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8>, <8 x i8>) + +define <8 x i8> @sadd_v8i8_vv(<8 x i8> %va, <8 x i8> %b) { +; CHECK-LABEL: sadd_v8i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b) + ret <8 x i8> %v +} + +define <8 x i8> @sadd_v8i8_vx(<8 x i8> %va, i8 %b) { +; CHECK-LABEL: sadd_v8i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb) + ret <8 x i8> %v +} + +define <8 x i8> @sadd_v8i8_vi(<8 x i8> %va) { +; CHECK-LABEL: sadd_v8i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 5, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) + +define <16 x i8> @sadd_v16i8_vv(<16 x i8> %va, <16 x i8> %b) { +; CHECK-LABEL: sadd_v16i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b) + ret <16 x i8> %v +} + +define <16 x i8> @sadd_v16i8_vx(<16 x i8> %va, i8 %b) { +; CHECK-LABEL: sadd_v16i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %v +} + +define <16 x i8> @sadd_v16i8_vi(<16 x i8> %va) { +; CHECK-LABEL: sadd_v16i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 5, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %v +} + +declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>) + +define <2 x i16> @sadd_v2i16_vv(<2 x i16> %va, <2 x i16> %b) { +; CHECK-LABEL: sadd_v2i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b) + ret <2 x i16> %v +} + +define <2 x i16> @sadd_v2i16_vx(<2 x i16> %va, i16 %b) { +; CHECK-LABEL: sadd_v2i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb) + ret <2 x i16> %v +} + +define <2 x i16> @sadd_v2i16_vi(<2 x i16> %va) { +; CHECK-LABEL: sadd_v2i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 5, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) + +define <4 x i16> @sadd_v4i16_vv(<4 x i16> %va, <4 x i16> %b) { +; CHECK-LABEL: sadd_v4i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b) + ret <4 x i16> %v +} + +define <4 x i16> @sadd_v4i16_vx(<4 x i16> %va, i16 %b) { +; CHECK-LABEL: sadd_v4i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb) + ret <4 x i16> %v +} + +define <4 x i16> @sadd_v4i16_vi(<4 x i16> %va) { +; CHECK-LABEL: sadd_v4i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 5, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) + +define <8 x i16> @sadd_v8i16_vv(<8 x i16> %va, <8 x i16> %b) { +; CHECK-LABEL: sadd_v8i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b) + ret <8 x i16> %v +} + +define <8 x i16> @sadd_v8i16_vx(<8 x i16> %va, i16 %b) { +; CHECK-LABEL: sadd_v8i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %v +} + +define <8 x i16> @sadd_v8i16_vi(<8 x i16> %va) { +; CHECK-LABEL: sadd_v8i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 5, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>) + +define <16 x i16> @sadd_v16i16_vv(<16 x i16> %va, <16 x i16> %b) { +; CHECK-LABEL: sadd_v16i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b) + ret <16 x i16> %v +} + +define <16 x i16> @sadd_v16i16_vx(<16 x i16> %va, i16 %b) { +; CHECK-LABEL: sadd_v16i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %v +} + +define <16 x i16> @sadd_v16i16_vi(<16 x i16> %va) { +; CHECK-LABEL: sadd_v16i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 5, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) + +define <2 x i32> @sadd_v2i32_vv(<2 x i32> %va, <2 x i32> %b) { +; CHECK-LABEL: sadd_v2i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b) + ret <2 x i32> %v +} + +define <2 x i32> @sadd_v2i32_vx(<2 x i32> %va, i32 %b) { +; CHECK-LABEL: sadd_v2i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb) + ret <2 x i32> %v +} + +define <2 x i32> @sadd_v2i32_vi(<2 x i32> %va) { +; CHECK-LABEL: sadd_v2i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 5, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) + +define <4 x i32> @sadd_v4i32_vv(<4 x i32> %va, <4 x i32> %b) { +; CHECK-LABEL: sadd_v4i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b) + ret <4 x i32> %v +} + +define <4 x i32> @sadd_v4i32_vx(<4 x i32> %va, i32 %b) { +; CHECK-LABEL: sadd_v4i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %v +} + +define <4 x i32> @sadd_v4i32_vi(<4 x i32> %va) { +; CHECK-LABEL: sadd_v4i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 5, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32>, <8 x i32>) + +define <8 x i32> @sadd_v8i32_vv(<8 x i32> %va, <8 x i32> %b) { +; CHECK-LABEL: sadd_v8i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b) + ret <8 x i32> %v +} + +define <8 x i32> @sadd_v8i32_vx(<8 x i32> %va, i32 %b) { +; CHECK-LABEL: sadd_v8i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %v +} + +define <8 x i32> @sadd_v8i32_vi(<8 x i32> %va) { +; CHECK-LABEL: sadd_v8i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 5, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32>, <16 x i32>) + +define <16 x i32> @sadd_v16i32_vv(<16 x i32> %va, <16 x i32> %b) { +; CHECK-LABEL: sadd_v16i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b) + ret <16 x i32> %v +} + +define <16 x i32> @sadd_v16i32_vx(<16 x i32> %va, i32 %b) { +; CHECK-LABEL: sadd_v16i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb) + ret <16 x i32> %v +} + +define <16 x i32> @sadd_v16i32_vi(<16 x i32> %va) { +; CHECK-LABEL: sadd_v16i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 5, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>) + +define <2 x i64> @sadd_v2i64_vv(<2 x i64> %va, <2 x i64> %b) { +; CHECK-LABEL: sadd_v2i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b) + ret <2 x i64> %v +} + +define <2 x i64> @sadd_v2i64_vx(<2 x i64> %va, i64 %b) { +; RV32-LABEL: sadd_v2i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsadd.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: sadd_v2i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %v +} + +define <2 x i64> @sadd_v2i64_vi(<2 x i64> %va) { +; CHECK-LABEL: sadd_v2i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 5, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64>, <4 x i64>) + +define <4 x i64> @sadd_v4i64_vv(<4 x i64> %va, <4 x i64> %b) { +; CHECK-LABEL: sadd_v4i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b) + ret <4 x i64> %v +} + +define <4 x i64> @sadd_v4i64_vx(<4 x i64> %va, i64 %b) { +; RV32-LABEL: sadd_v4i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsadd.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: sadd_v4i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %v +} + +define <4 x i64> @sadd_v4i64_vi(<4 x i64> %va) { +; CHECK-LABEL: sadd_v4i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 5, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64>, <8 x i64>) + +define <8 x i64> @sadd_v8i64_vv(<8 x i64> %va, <8 x i64> %b) { +; CHECK-LABEL: sadd_v8i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b) + ret <8 x i64> %v +} + +define <8 x i64> @sadd_v8i64_vx(<8 x i64> %va, i64 %b) { +; RV32-LABEL: sadd_v8i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsadd.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: sadd_v8i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb) + ret <8 x i64> %v +} + +define <8 x i64> @sadd_v8i64_vi(<8 x i64> %va) { +; CHECK-LABEL: sadd_v8i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 5, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.sadd.sat.v16i64(<16 x i64>, <16 x i64>) + +define <16 x i64> @sadd_v16i64_vv(<16 x i64> %va, <16 x i64> %b) { +; CHECK-LABEL: sadd_v16i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b) + ret <16 x i64> %v +} + +define <16 x i64> @sadd_v16i64_vx(<16 x i64> %va, i64 %b) { +; RV32-LABEL: sadd_v16i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsadd.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: sadd_v16i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb) + ret <16 x i64> %v +} + +define <16 x i64> @sadd_v16i64_vi(<16 x i64> %va) { +; CHECK-LABEL: sadd_v16i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 5, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb) + ret <16 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll @@ -0,0 +1,633 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) + +define <2 x i8> @uadd_v2i8_vv(<2 x i8> %va, <2 x i8> %b) { +; CHECK-LABEL: uadd_v2i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b) + ret <2 x i8> %v +} + +define <2 x i8> @uadd_v2i8_vx(<2 x i8> %va, i8 %b) { +; CHECK-LABEL: uadd_v2i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb) + ret <2 x i8> %v +} + +define <2 x i8> @uadd_v2i8_vi(<2 x i8> %va) { +; CHECK-LABEL: uadd_v2i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 8, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>) + +define <4 x i8> @uadd_v4i8_vv(<4 x i8> %va, <4 x i8> %b) { +; CHECK-LABEL: uadd_v4i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b) + ret <4 x i8> %v +} + +define <4 x i8> @uadd_v4i8_vx(<4 x i8> %va, i8 %b) { +; CHECK-LABEL: uadd_v4i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb) + ret <4 x i8> %v +} + +define <4 x i8> @uadd_v4i8_vi(<4 x i8> %va) { +; CHECK-LABEL: uadd_v4i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 8, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb) + ret <4 x i8> %v +} + +declare <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8>, <8 x i8>) + +define <8 x i8> @uadd_v8i8_vv(<8 x i8> %va, <8 x i8> %b) { +; CHECK-LABEL: uadd_v8i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b) + ret <8 x i8> %v +} + +define <8 x i8> @uadd_v8i8_vx(<8 x i8> %va, i8 %b) { +; CHECK-LABEL: uadd_v8i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb) + ret <8 x i8> %v +} + +define <8 x i8> @uadd_v8i8_vi(<8 x i8> %va) { +; CHECK-LABEL: uadd_v8i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 8, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) + +define <16 x i8> @uadd_v16i8_vv(<16 x i8> %va, <16 x i8> %b) { +; CHECK-LABEL: uadd_v16i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b) + ret <16 x i8> %v +} + +define <16 x i8> @uadd_v16i8_vx(<16 x i8> %va, i8 %b) { +; CHECK-LABEL: uadd_v16i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %v +} + +define <16 x i8> @uadd_v16i8_vi(<16 x i8> %va) { +; CHECK-LABEL: uadd_v16i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 8, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %v +} + +declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>) + +define <2 x i16> @uadd_v2i16_vv(<2 x i16> %va, <2 x i16> %b) { +; CHECK-LABEL: uadd_v2i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b) + ret <2 x i16> %v +} + +define <2 x i16> @uadd_v2i16_vx(<2 x i16> %va, i16 %b) { +; CHECK-LABEL: uadd_v2i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb) + ret <2 x i16> %v +} + +define <2 x i16> @uadd_v2i16_vi(<2 x i16> %va) { +; CHECK-LABEL: uadd_v2i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 8, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) + +define <4 x i16> @uadd_v4i16_vv(<4 x i16> %va, <4 x i16> %b) { +; CHECK-LABEL: uadd_v4i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b) + ret <4 x i16> %v +} + +define <4 x i16> @uadd_v4i16_vx(<4 x i16> %va, i16 %b) { +; CHECK-LABEL: uadd_v4i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb) + ret <4 x i16> %v +} + +define <4 x i16> @uadd_v4i16_vi(<4 x i16> %va) { +; CHECK-LABEL: uadd_v4i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 8, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) + +define <8 x i16> @uadd_v8i16_vv(<8 x i16> %va, <8 x i16> %b) { +; CHECK-LABEL: uadd_v8i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b) + ret <8 x i16> %v +} + +define <8 x i16> @uadd_v8i16_vx(<8 x i16> %va, i16 %b) { +; CHECK-LABEL: uadd_v8i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %v +} + +define <8 x i16> @uadd_v8i16_vi(<8 x i16> %va) { +; CHECK-LABEL: uadd_v8i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 8, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>) + +define <16 x i16> @uadd_v16i16_vv(<16 x i16> %va, <16 x i16> %b) { +; CHECK-LABEL: uadd_v16i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b) + ret <16 x i16> %v +} + +define <16 x i16> @uadd_v16i16_vx(<16 x i16> %va, i16 %b) { +; CHECK-LABEL: uadd_v16i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %v +} + +define <16 x i16> @uadd_v16i16_vi(<16 x i16> %va) { +; CHECK-LABEL: uadd_v16i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 8, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) + +define <2 x i32> @uadd_v2i32_vv(<2 x i32> %va, <2 x i32> %b) { +; CHECK-LABEL: uadd_v2i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b) + ret <2 x i32> %v +} + +define <2 x i32> @uadd_v2i32_vx(<2 x i32> %va, i32 %b) { +; CHECK-LABEL: uadd_v2i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb) + ret <2 x i32> %v +} + +define <2 x i32> @uadd_v2i32_vi(<2 x i32> %va) { +; CHECK-LABEL: uadd_v2i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 8, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) + +define <4 x i32> @uadd_v4i32_vv(<4 x i32> %va, <4 x i32> %b) { +; CHECK-LABEL: uadd_v4i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b) + ret <4 x i32> %v +} + +define <4 x i32> @uadd_v4i32_vx(<4 x i32> %va, i32 %b) { +; CHECK-LABEL: uadd_v4i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %v +} + +define <4 x i32> @uadd_v4i32_vi(<4 x i32> %va) { +; CHECK-LABEL: uadd_v4i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 8, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32>, <8 x i32>) + +define <8 x i32> @uadd_v8i32_vv(<8 x i32> %va, <8 x i32> %b) { +; CHECK-LABEL: uadd_v8i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b) + ret <8 x i32> %v +} + +define <8 x i32> @uadd_v8i32_vx(<8 x i32> %va, i32 %b) { +; CHECK-LABEL: uadd_v8i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %v +} + +define <8 x i32> @uadd_v8i32_vi(<8 x i32> %va) { +; CHECK-LABEL: uadd_v8i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 8, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32>, <16 x i32>) + +define <16 x i32> @uadd_v16i32_vv(<16 x i32> %va, <16 x i32> %b) { +; CHECK-LABEL: uadd_v16i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b) + ret <16 x i32> %v +} + +define <16 x i32> @uadd_v16i32_vx(<16 x i32> %va, i32 %b) { +; CHECK-LABEL: uadd_v16i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb) + ret <16 x i32> %v +} + +define <16 x i32> @uadd_v16i32_vi(<16 x i32> %va) { +; CHECK-LABEL: uadd_v16i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 8, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64>, <2 x i64>) + +define <2 x i64> @uadd_v2i64_vv(<2 x i64> %va, <2 x i64> %b) { +; CHECK-LABEL: uadd_v2i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b) + ret <2 x i64> %v +} + +define <2 x i64> @uadd_v2i64_vx(<2 x i64> %va, i64 %b) { +; RV32-LABEL: uadd_v2i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: uadd_v2i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %v +} + +define <2 x i64> @uadd_v2i64_vi(<2 x i64> %va) { +; CHECK-LABEL: uadd_v2i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 8, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64>, <4 x i64>) + +define <4 x i64> @uadd_v4i64_vv(<4 x i64> %va, <4 x i64> %b) { +; CHECK-LABEL: uadd_v4i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b) + ret <4 x i64> %v +} + +define <4 x i64> @uadd_v4i64_vx(<4 x i64> %va, i64 %b) { +; RV32-LABEL: uadd_v4i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: uadd_v4i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %v +} + +define <4 x i64> @uadd_v4i64_vi(<4 x i64> %va) { +; CHECK-LABEL: uadd_v4i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 8, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64>, <8 x i64>) + +define <8 x i64> @uadd_v8i64_vv(<8 x i64> %va, <8 x i64> %b) { +; CHECK-LABEL: uadd_v8i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b) + ret <8 x i64> %v +} + +define <8 x i64> @uadd_v8i64_vx(<8 x i64> %va, i64 %b) { +; RV32-LABEL: uadd_v8i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: uadd_v8i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb) + ret <8 x i64> %v +} + +define <8 x i64> @uadd_v8i64_vi(<8 x i64> %va) { +; CHECK-LABEL: uadd_v8i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 8, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.uadd.sat.v16i64(<16 x i64>, <16 x i64>) + +define <16 x i64> @uadd_v16i64_vv(<16 x i64> %va, <16 x i64> %b) { +; CHECK-LABEL: uadd_v16i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b) + ret <16 x i64> %v +} + +define <16 x i64> @uadd_v16i64_vx(<16 x i64> %va, i64 %b) { +; RV32-LABEL: uadd_v16i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: uadd_v16i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb) + ret <16 x i64> %v +} + +define <16 x i64> @uadd_v16i64_vi(<16 x i64> %va) { +; CHECK-LABEL: uadd_v16i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 8, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb) + ret <16 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll @@ -0,0 +1,649 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) + +define <2 x i8> @ssub_v2i8_vv(<2 x i8> %va, <2 x i8> %b) { +; CHECK-LABEL: ssub_v2i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %b) + ret <2 x i8> %v +} + +define <2 x i8> @ssub_v2i8_vx(<2 x i8> %va, i8 %b) { +; CHECK-LABEL: ssub_v2i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb) + ret <2 x i8> %v +} + +define <2 x i8> @ssub_v2i8_vi(<2 x i8> %va) { +; CHECK-LABEL: ssub_v2i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 1, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>) + +define <4 x i8> @ssub_v4i8_vv(<4 x i8> %va, <4 x i8> %b) { +; CHECK-LABEL: ssub_v4i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %b) + ret <4 x i8> %v +} + +define <4 x i8> @ssub_v4i8_vx(<4 x i8> %va, i8 %b) { +; CHECK-LABEL: ssub_v4i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb) + ret <4 x i8> %v +} + +define <4 x i8> @ssub_v4i8_vi(<4 x i8> %va) { +; CHECK-LABEL: ssub_v4i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 1, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb) + ret <4 x i8> %v +} + +declare <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8>, <8 x i8>) + +define <8 x i8> @ssub_v8i8_vv(<8 x i8> %va, <8 x i8> %b) { +; CHECK-LABEL: ssub_v8i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %b) + ret <8 x i8> %v +} + +define <8 x i8> @ssub_v8i8_vx(<8 x i8> %va, i8 %b) { +; CHECK-LABEL: ssub_v8i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb) + ret <8 x i8> %v +} + +define <8 x i8> @ssub_v8i8_vi(<8 x i8> %va) { +; CHECK-LABEL: ssub_v8i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 1, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) + +define <16 x i8> @ssub_v16i8_vv(<16 x i8> %va, <16 x i8> %b) { +; CHECK-LABEL: ssub_v16i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %b) + ret <16 x i8> %v +} + +define <16 x i8> @ssub_v16i8_vx(<16 x i8> %va, i8 %b) { +; CHECK-LABEL: ssub_v16i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %v +} + +define <16 x i8> @ssub_v16i8_vi(<16 x i8> %va) { +; CHECK-LABEL: ssub_v16i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 1, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %v +} + +declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) + +define <2 x i16> @ssub_v2i16_vv(<2 x i16> %va, <2 x i16> %b) { +; CHECK-LABEL: ssub_v2i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %b) + ret <2 x i16> %v +} + +define <2 x i16> @ssub_v2i16_vx(<2 x i16> %va, i16 %b) { +; CHECK-LABEL: ssub_v2i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb) + ret <2 x i16> %v +} + +define <2 x i16> @ssub_v2i16_vi(<2 x i16> %va) { +; CHECK-LABEL: ssub_v2i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 1, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) + +define <4 x i16> @ssub_v4i16_vv(<4 x i16> %va, <4 x i16> %b) { +; CHECK-LABEL: ssub_v4i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %b) + ret <4 x i16> %v +} + +define <4 x i16> @ssub_v4i16_vx(<4 x i16> %va, i16 %b) { +; CHECK-LABEL: ssub_v4i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb) + ret <4 x i16> %v +} + +define <4 x i16> @ssub_v4i16_vi(<4 x i16> %va) { +; CHECK-LABEL: ssub_v4i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 1, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) + +define <8 x i16> @ssub_v8i16_vv(<8 x i16> %va, <8 x i16> %b) { +; CHECK-LABEL: ssub_v8i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %b) + ret <8 x i16> %v +} + +define <8 x i16> @ssub_v8i16_vx(<8 x i16> %va, i16 %b) { +; CHECK-LABEL: ssub_v8i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %v +} + +define <8 x i16> @ssub_v8i16_vi(<8 x i16> %va) { +; CHECK-LABEL: ssub_v8i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 1, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>) + +define <16 x i16> @ssub_v16i16_vv(<16 x i16> %va, <16 x i16> %b) { +; CHECK-LABEL: ssub_v16i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %b) + ret <16 x i16> %v +} + +define <16 x i16> @ssub_v16i16_vx(<16 x i16> %va, i16 %b) { +; CHECK-LABEL: ssub_v16i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %v +} + +define <16 x i16> @ssub_v16i16_vi(<16 x i16> %va) { +; CHECK-LABEL: ssub_v16i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 1, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) + +define <2 x i32> @ssub_v2i32_vv(<2 x i32> %va, <2 x i32> %b) { +; CHECK-LABEL: ssub_v2i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %b) + ret <2 x i32> %v +} + +define <2 x i32> @ssub_v2i32_vx(<2 x i32> %va, i32 %b) { +; CHECK-LABEL: ssub_v2i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb) + ret <2 x i32> %v +} + +define <2 x i32> @ssub_v2i32_vi(<2 x i32> %va) { +; CHECK-LABEL: ssub_v2i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 1, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) + +define <4 x i32> @ssub_v4i32_vv(<4 x i32> %va, <4 x i32> %b) { +; CHECK-LABEL: ssub_v4i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %b) + ret <4 x i32> %v +} + +define <4 x i32> @ssub_v4i32_vx(<4 x i32> %va, i32 %b) { +; CHECK-LABEL: ssub_v4i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %v +} + +define <4 x i32> @ssub_v4i32_vi(<4 x i32> %va) { +; CHECK-LABEL: ssub_v4i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 1, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>) + +define <8 x i32> @ssub_v8i32_vv(<8 x i32> %va, <8 x i32> %b) { +; CHECK-LABEL: ssub_v8i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %b) + ret <8 x i32> %v +} + +define <8 x i32> @ssub_v8i32_vx(<8 x i32> %va, i32 %b) { +; CHECK-LABEL: ssub_v8i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %v +} + +define <8 x i32> @ssub_v8i32_vi(<8 x i32> %va) { +; CHECK-LABEL: ssub_v8i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 1, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) + +define <16 x i32> @ssub_v16i32_vv(<16 x i32> %va, <16 x i32> %b) { +; CHECK-LABEL: ssub_v16i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %b) + ret <16 x i32> %v +} + +define <16 x i32> @ssub_v16i32_vx(<16 x i32> %va, i32 %b) { +; CHECK-LABEL: ssub_v16i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb) + ret <16 x i32> %v +} + +define <16 x i32> @ssub_v16i32_vi(<16 x i32> %va) { +; CHECK-LABEL: ssub_v16i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 1, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>) + +define <2 x i64> @ssub_v2i64_vv(<2 x i64> %va, <2 x i64> %b) { +; CHECK-LABEL: ssub_v2i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %b) + ret <2 x i64> %v +} + +define <2 x i64> @ssub_v2i64_vx(<2 x i64> %va, i64 %b) { +; RV32-LABEL: ssub_v2i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vssub.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: ssub_v2i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %v +} + +define <2 x i64> @ssub_v2i64_vi(<2 x i64> %va) { +; CHECK-LABEL: ssub_v2i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 1, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64>, <4 x i64>) + +define <4 x i64> @ssub_v4i64_vv(<4 x i64> %va, <4 x i64> %b) { +; CHECK-LABEL: ssub_v4i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %b) + ret <4 x i64> %v +} + +define <4 x i64> @ssub_v4i64_vx(<4 x i64> %va, i64 %b) { +; RV32-LABEL: ssub_v4i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vssub.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: ssub_v4i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %v +} + +define <4 x i64> @ssub_v4i64_vi(<4 x i64> %va) { +; CHECK-LABEL: ssub_v4i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 1, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64>, <8 x i64>) + +define <8 x i64> @ssub_v8i64_vv(<8 x i64> %va, <8 x i64> %b) { +; CHECK-LABEL: ssub_v8i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %b) + ret <8 x i64> %v +} + +define <8 x i64> @ssub_v8i64_vx(<8 x i64> %va, i64 %b) { +; RV32-LABEL: ssub_v8i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vssub.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: ssub_v8i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb) + ret <8 x i64> %v +} + +define <8 x i64> @ssub_v8i64_vi(<8 x i64> %va) { +; CHECK-LABEL: ssub_v8i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 1, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.ssub.sat.v16i64(<16 x i64>, <16 x i64>) + +define <16 x i64> @ssub_v16i64_vv(<16 x i64> %va, <16 x i64> %b) { +; CHECK-LABEL: ssub_v16i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %b) + ret <16 x i64> %v +} + +define <16 x i64> @ssub_v16i64_vx(<16 x i64> %va, i64 %b) { +; RV32-LABEL: ssub_v16i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vssub.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: ssub_v16i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb) + ret <16 x i64> %v +} + +define <16 x i64> @ssub_v16i64_vi(<16 x i64> %va) { +; CHECK-LABEL: ssub_v16i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 1, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb) + ret <16 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll @@ -0,0 +1,649 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) + +define <2 x i8> @usub_v2i8_vv(<2 x i8> %va, <2 x i8> %b) { +; CHECK-LABEL: usub_v2i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %b) + ret <2 x i8> %v +} + +define <2 x i8> @usub_v2i8_vx(<2 x i8> %va, i8 %b) { +; CHECK-LABEL: usub_v2i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb) + ret <2 x i8> %v +} + +define <2 x i8> @usub_v2i8_vi(<2 x i8> %va) { +; CHECK-LABEL: usub_v2i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> undef, i8 2, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer + %v = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb) + ret <2 x i8> %v +} + +declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>) + +define <4 x i8> @usub_v4i8_vv(<4 x i8> %va, <4 x i8> %b) { +; CHECK-LABEL: usub_v4i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %b) + ret <4 x i8> %v +} + +define <4 x i8> @usub_v4i8_vx(<4 x i8> %va, i8 %b) { +; CHECK-LABEL: usub_v4i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb) + ret <4 x i8> %v +} + +define <4 x i8> @usub_v4i8_vi(<4 x i8> %va) { +; CHECK-LABEL: usub_v4i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> undef, i8 2, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer + %v = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb) + ret <4 x i8> %v +} + +declare <8 x i8> @llvm.usub.sat.v8i8(<8 x i8>, <8 x i8>) + +define <8 x i8> @usub_v8i8_vv(<8 x i8> %va, <8 x i8> %b) { +; CHECK-LABEL: usub_v8i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %b) + ret <8 x i8> %v +} + +define <8 x i8> @usub_v8i8_vx(<8 x i8> %va, i8 %b) { +; CHECK-LABEL: usub_v8i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb) + ret <8 x i8> %v +} + +define <8 x i8> @usub_v8i8_vi(<8 x i8> %va) { +; CHECK-LABEL: usub_v8i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> undef, i8 2, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> undef, <8 x i32> zeroinitializer + %v = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb) + ret <8 x i8> %v +} + +declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) + +define <16 x i8> @usub_v16i8_vv(<16 x i8> %va, <16 x i8> %b) { +; CHECK-LABEL: usub_v16i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %b) + ret <16 x i8> %v +} + +define <16 x i8> @usub_v16i8_vx(<16 x i8> %va, i8 %b) { +; CHECK-LABEL: usub_v16i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %v +} + +define <16 x i8> @usub_v16i8_vi(<16 x i8> %va) { +; CHECK-LABEL: usub_v16i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> undef, i8 2, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> undef, <16 x i32> zeroinitializer + %v = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %v +} + +declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>) + +define <2 x i16> @usub_v2i16_vv(<2 x i16> %va, <2 x i16> %b) { +; CHECK-LABEL: usub_v2i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %b) + ret <2 x i16> %v +} + +define <2 x i16> @usub_v2i16_vx(<2 x i16> %va, i16 %b) { +; CHECK-LABEL: usub_v2i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb) + ret <2 x i16> %v +} + +define <2 x i16> @usub_v2i16_vi(<2 x i16> %va) { +; CHECK-LABEL: usub_v2i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> undef, i16 2, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> undef, <2 x i32> zeroinitializer + %v = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb) + ret <2 x i16> %v +} + +declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>) + +define <4 x i16> @usub_v4i16_vv(<4 x i16> %va, <4 x i16> %b) { +; CHECK-LABEL: usub_v4i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %b) + ret <4 x i16> %v +} + +define <4 x i16> @usub_v4i16_vx(<4 x i16> %va, i16 %b) { +; CHECK-LABEL: usub_v4i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb) + ret <4 x i16> %v +} + +define <4 x i16> @usub_v4i16_vi(<4 x i16> %va) { +; CHECK-LABEL: usub_v4i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> undef, i16 2, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> undef, <4 x i32> zeroinitializer + %v = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb) + ret <4 x i16> %v +} + +declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) + +define <8 x i16> @usub_v8i16_vv(<8 x i16> %va, <8 x i16> %b) { +; CHECK-LABEL: usub_v8i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %b) + ret <8 x i16> %v +} + +define <8 x i16> @usub_v8i16_vx(<8 x i16> %va, i16 %b) { +; CHECK-LABEL: usub_v8i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %v +} + +define <8 x i16> @usub_v8i16_vi(<8 x i16> %va) { +; CHECK-LABEL: usub_v8i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> undef, i16 2, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer + %v = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %v +} + +declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>) + +define <16 x i16> @usub_v16i16_vv(<16 x i16> %va, <16 x i16> %b) { +; CHECK-LABEL: usub_v16i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %b) + ret <16 x i16> %v +} + +define <16 x i16> @usub_v16i16_vx(<16 x i16> %va, i16 %b) { +; CHECK-LABEL: usub_v16i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %v +} + +define <16 x i16> @usub_v16i16_vi(<16 x i16> %va) { +; CHECK-LABEL: usub_v16i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> undef, i16 2, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> undef, <16 x i32> zeroinitializer + %v = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %v +} + +declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) + +define <2 x i32> @usub_v2i32_vv(<2 x i32> %va, <2 x i32> %b) { +; CHECK-LABEL: usub_v2i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %b) + ret <2 x i32> %v +} + +define <2 x i32> @usub_v2i32_vx(<2 x i32> %va, i32 %b) { +; CHECK-LABEL: usub_v2i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb) + ret <2 x i32> %v +} + +define <2 x i32> @usub_v2i32_vi(<2 x i32> %va) { +; CHECK-LABEL: usub_v2i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> undef, i32 2, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> undef, <2 x i32> zeroinitializer + %v = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb) + ret <2 x i32> %v +} + +declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) + +define <4 x i32> @usub_v4i32_vv(<4 x i32> %va, <4 x i32> %b) { +; CHECK-LABEL: usub_v4i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %b) + ret <4 x i32> %v +} + +define <4 x i32> @usub_v4i32_vx(<4 x i32> %va, i32 %b) { +; CHECK-LABEL: usub_v4i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %v +} + +define <4 x i32> @usub_v4i32_vi(<4 x i32> %va) { +; CHECK-LABEL: usub_v4i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> undef, i32 2, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> undef, <4 x i32> zeroinitializer + %v = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %v +} + +declare <8 x i32> @llvm.usub.sat.v8i32(<8 x i32>, <8 x i32>) + +define <8 x i32> @usub_v8i32_vv(<8 x i32> %va, <8 x i32> %b) { +; CHECK-LABEL: usub_v8i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %b) + ret <8 x i32> %v +} + +define <8 x i32> @usub_v8i32_vx(<8 x i32> %va, i32 %b) { +; CHECK-LABEL: usub_v8i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %v +} + +define <8 x i32> @usub_v8i32_vi(<8 x i32> %va) { +; CHECK-LABEL: usub_v8i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> undef, i32 2, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> undef, <8 x i32> zeroinitializer + %v = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %v +} + +declare <16 x i32> @llvm.usub.sat.v16i32(<16 x i32>, <16 x i32>) + +define <16 x i32> @usub_v16i32_vv(<16 x i32> %va, <16 x i32> %b) { +; CHECK-LABEL: usub_v16i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %b) + ret <16 x i32> %v +} + +define <16 x i32> @usub_v16i32_vx(<16 x i32> %va, i32 %b) { +; CHECK-LABEL: usub_v16i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb) + ret <16 x i32> %v +} + +define <16 x i32> @usub_v16i32_vi(<16 x i32> %va) { +; CHECK-LABEL: usub_v16i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> undef, i32 2, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> undef, <16 x i32> zeroinitializer + %v = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb) + ret <16 x i32> %v +} + +declare <2 x i64> @llvm.usub.sat.v2i64(<2 x i64>, <2 x i64>) + +define <2 x i64> @usub_v2i64_vv(<2 x i64> %va, <2 x i64> %b) { +; CHECK-LABEL: usub_v2i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %b) + ret <2 x i64> %v +} + +define <2 x i64> @usub_v2i64_vx(<2 x i64> %va, i64 %b) { +; RV32-LABEL: usub_v2i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vssubu.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: usub_v2i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %v +} + +define <2 x i64> @usub_v2i64_vi(<2 x i64> %va) { +; CHECK-LABEL: usub_v2i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i64> undef, i64 2, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> undef, <2 x i32> zeroinitializer + %v = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %v +} + +declare <4 x i64> @llvm.usub.sat.v4i64(<4 x i64>, <4 x i64>) + +define <4 x i64> @usub_v4i64_vv(<4 x i64> %va, <4 x i64> %b) { +; CHECK-LABEL: usub_v4i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %b) + ret <4 x i64> %v +} + +define <4 x i64> @usub_v4i64_vx(<4 x i64> %va, i64 %b) { +; RV32-LABEL: usub_v4i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vssubu.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: usub_v4i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %v +} + +define <4 x i64> @usub_v4i64_vi(<4 x i64> %va) { +; CHECK-LABEL: usub_v4i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i64> undef, i64 2, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> undef, <4 x i32> zeroinitializer + %v = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %v +} + +declare <8 x i64> @llvm.usub.sat.v8i64(<8 x i64>, <8 x i64>) + +define <8 x i64> @usub_v8i64_vv(<8 x i64> %va, <8 x i64> %b) { +; CHECK-LABEL: usub_v8i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %b) + ret <8 x i64> %v +} + +define <8 x i64> @usub_v8i64_vx(<8 x i64> %va, i64 %b) { +; RV32-LABEL: usub_v8i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vssubu.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: usub_v8i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb) + ret <8 x i64> %v +} + +define <8 x i64> @usub_v8i64_vi(<8 x i64> %va) { +; CHECK-LABEL: usub_v8i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i64> undef, i64 2, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> undef, <8 x i32> zeroinitializer + %v = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb) + ret <8 x i64> %v +} + +declare <16 x i64> @llvm.usub.sat.v16i64(<16 x i64>, <16 x i64>) + +define <16 x i64> @usub_v16i64_vv(<16 x i64> %va, <16 x i64> %b) { +; CHECK-LABEL: usub_v16i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call <16 x i64> @llvm.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %b) + ret <16 x i64> %v +} + +define <16 x i64> @usub_v16i64_vx(<16 x i64> %va, i64 %b) { +; RV32-LABEL: usub_v16i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vssubu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: usub_v16i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 %b, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb) + ret <16 x i64> %v +} + +define <16 x i64> @usub_v16i64_vi(<16 x i64> %va) { +; CHECK-LABEL: usub_v16i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i64> undef, i64 2, i32 0 + %vb = shufflevector <16 x i64> %elt.head, <16 x i64> undef, <16 x i32> zeroinitializer + %v = call <16 x i64> @llvm.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb) + ret <16 x i64> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll @@ -7,11 +7,10 @@ ; CHECK-LABEL: saddo_nvx2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu -; CHECK-NEXT: vadd.vv v25, v8, v9 -; CHECK-NEXT: vmslt.vv v26, v25, v8 -; CHECK-NEXT: vmsle.vi v27, v9, -1 -; CHECK-NEXT: vmxor.mm v0, v27, v26 -; CHECK-NEXT: vmerge.vim v8, v25, 0, v0 +; CHECK-NEXT: vsadd.vv v25, v8, v9 +; CHECK-NEXT: vadd.vv v26, v8, v9 +; CHECK-NEXT: vmsne.vv v0, v26, v25 +; CHECK-NEXT: vmerge.vim v8, v26, 0, v0 ; CHECK-NEXT: ret %a = call { , } @llvm.sadd.with.overflow.nxv2i32( %x, %y) %b = extractvalue { , } %a, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll @@ -0,0 +1,849 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.sadd.sat.nxv1i8(, ) + +define @sadd_nxv1i8_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv1i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv1i8( %va, %b) + ret %v +} + +define @sadd_nxv1i8_vx( %va, i8 %b) { +; CHECK-LABEL: sadd_nxv1i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv1i8( %va, %vb) + ret %v +} + +define @sadd_nxv1i8_vi( %va) { +; CHECK-LABEL: sadd_nxv1i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv1i8( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv2i8(, ) + +define @sadd_nxv2i8_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv2i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv2i8( %va, %b) + ret %v +} + +define @sadd_nxv2i8_vx( %va, i8 %b) { +; CHECK-LABEL: sadd_nxv2i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv2i8( %va, %vb) + ret %v +} + +define @sadd_nxv2i8_vi( %va) { +; CHECK-LABEL: sadd_nxv2i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv2i8( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv4i8(, ) + +define @sadd_nxv4i8_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv4i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv4i8( %va, %b) + ret %v +} + +define @sadd_nxv4i8_vx( %va, i8 %b) { +; CHECK-LABEL: sadd_nxv4i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv4i8( %va, %vb) + ret %v +} + +define @sadd_nxv4i8_vi( %va) { +; CHECK-LABEL: sadd_nxv4i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv4i8( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv8i8(, ) + +define @sadd_nxv8i8_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv8i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv8i8( %va, %b) + ret %v +} + +define @sadd_nxv8i8_vx( %va, i8 %b) { +; CHECK-LABEL: sadd_nxv8i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv8i8( %va, %vb) + ret %v +} + +define @sadd_nxv8i8_vi( %va) { +; CHECK-LABEL: sadd_nxv8i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv8i8( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv16i8(, ) + +define @sadd_nxv16i8_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv16i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv16i8( %va, %b) + ret %v +} + +define @sadd_nxv16i8_vx( %va, i8 %b) { +; CHECK-LABEL: sadd_nxv16i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv16i8( %va, %vb) + ret %v +} + +define @sadd_nxv16i8_vi( %va) { +; CHECK-LABEL: sadd_nxv16i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv16i8( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv32i8(, ) + +define @sadd_nxv32i8_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv32i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv32i8( %va, %b) + ret %v +} + +define @sadd_nxv32i8_vx( %va, i8 %b) { +; CHECK-LABEL: sadd_nxv32i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv32i8( %va, %vb) + ret %v +} + +define @sadd_nxv32i8_vi( %va) { +; CHECK-LABEL: sadd_nxv32i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv32i8( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv64i8(, ) + +define @sadd_nxv64i8_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv64i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv64i8( %va, %b) + ret %v +} + +define @sadd_nxv64i8_vx( %va, i8 %b) { +; CHECK-LABEL: sadd_nxv64i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv64i8( %va, %vb) + ret %v +} + +define @sadd_nxv64i8_vi( %va) { +; CHECK-LABEL: sadd_nxv64i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv64i8( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv1i16(, ) + +define @sadd_nxv1i16_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv1i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv1i16( %va, %b) + ret %v +} + +define @sadd_nxv1i16_vx( %va, i16 %b) { +; CHECK-LABEL: sadd_nxv1i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv1i16( %va, %vb) + ret %v +} + +define @sadd_nxv1i16_vi( %va) { +; CHECK-LABEL: sadd_nxv1i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv1i16( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv2i16(, ) + +define @sadd_nxv2i16_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv2i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv2i16( %va, %b) + ret %v +} + +define @sadd_nxv2i16_vx( %va, i16 %b) { +; CHECK-LABEL: sadd_nxv2i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv2i16( %va, %vb) + ret %v +} + +define @sadd_nxv2i16_vi( %va) { +; CHECK-LABEL: sadd_nxv2i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv2i16( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv4i16(, ) + +define @sadd_nxv4i16_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv4i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv4i16( %va, %b) + ret %v +} + +define @sadd_nxv4i16_vx( %va, i16 %b) { +; CHECK-LABEL: sadd_nxv4i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv4i16( %va, %vb) + ret %v +} + +define @sadd_nxv4i16_vi( %va) { +; CHECK-LABEL: sadd_nxv4i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv4i16( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv8i16(, ) + +define @sadd_nxv8i16_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv8i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv8i16( %va, %b) + ret %v +} + +define @sadd_nxv8i16_vx( %va, i16 %b) { +; CHECK-LABEL: sadd_nxv8i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv8i16( %va, %vb) + ret %v +} + +define @sadd_nxv8i16_vi( %va) { +; CHECK-LABEL: sadd_nxv8i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv8i16( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv16i16(, ) + +define @sadd_nxv16i16_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv16i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv16i16( %va, %b) + ret %v +} + +define @sadd_nxv16i16_vx( %va, i16 %b) { +; CHECK-LABEL: sadd_nxv16i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv16i16( %va, %vb) + ret %v +} + +define @sadd_nxv16i16_vi( %va) { +; CHECK-LABEL: sadd_nxv16i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv16i16( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv32i16(, ) + +define @sadd_nxv32i16_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv32i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv32i16( %va, %b) + ret %v +} + +define @sadd_nxv32i16_vx( %va, i16 %b) { +; CHECK-LABEL: sadd_nxv32i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv32i16( %va, %vb) + ret %v +} + +define @sadd_nxv32i16_vi( %va) { +; CHECK-LABEL: sadd_nxv32i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv32i16( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv1i32(, ) + +define @sadd_nxv1i32_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv1i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv1i32( %va, %b) + ret %v +} + +define @sadd_nxv1i32_vx( %va, i32 %b) { +; CHECK-LABEL: sadd_nxv1i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv1i32( %va, %vb) + ret %v +} + +define @sadd_nxv1i32_vi( %va) { +; CHECK-LABEL: sadd_nxv1i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv1i32( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv2i32(, ) + +define @sadd_nxv2i32_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv2i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv2i32( %va, %b) + ret %v +} + +define @sadd_nxv2i32_vx( %va, i32 %b) { +; CHECK-LABEL: sadd_nxv2i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv2i32( %va, %vb) + ret %v +} + +define @sadd_nxv2i32_vi( %va) { +; CHECK-LABEL: sadd_nxv2i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv2i32( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv4i32(, ) + +define @sadd_nxv4i32_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv4i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv4i32( %va, %b) + ret %v +} + +define @sadd_nxv4i32_vx( %va, i32 %b) { +; CHECK-LABEL: sadd_nxv4i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv4i32( %va, %vb) + ret %v +} + +define @sadd_nxv4i32_vi( %va) { +; CHECK-LABEL: sadd_nxv4i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv4i32( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv8i32(, ) + +define @sadd_nxv8i32_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv8i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv8i32( %va, %b) + ret %v +} + +define @sadd_nxv8i32_vx( %va, i32 %b) { +; CHECK-LABEL: sadd_nxv8i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv8i32( %va, %vb) + ret %v +} + +define @sadd_nxv8i32_vi( %va) { +; CHECK-LABEL: sadd_nxv8i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv8i32( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv16i32(, ) + +define @sadd_nxv16i32_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv16i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv16i32( %va, %b) + ret %v +} + +define @sadd_nxv16i32_vx( %va, i32 %b) { +; CHECK-LABEL: sadd_nxv16i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vsadd.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv16i32( %va, %vb) + ret %v +} + +define @sadd_nxv16i32_vi( %va) { +; CHECK-LABEL: sadd_nxv16i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv16i32( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv1i64(, ) + +define @sadd_nxv1i64_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv1i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv1i64( %va, %b) + ret %v +} + +define @sadd_nxv1i64_vx( %va, i64 %b) { +; RV32-LABEL: sadd_nxv1i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsadd.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: sadd_nxv1i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv1i64( %va, %vb) + ret %v +} + +define @sadd_nxv1i64_vi( %va) { +; CHECK-LABEL: sadd_nxv1i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv1i64( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv2i64(, ) + +define @sadd_nxv2i64_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv2i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv2i64( %va, %b) + ret %v +} + +define @sadd_nxv2i64_vx( %va, i64 %b) { +; RV32-LABEL: sadd_nxv2i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsadd.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: sadd_nxv2i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv2i64( %va, %vb) + ret %v +} + +define @sadd_nxv2i64_vi( %va) { +; CHECK-LABEL: sadd_nxv2i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv2i64( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv4i64(, ) + +define @sadd_nxv4i64_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv4i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv4i64( %va, %b) + ret %v +} + +define @sadd_nxv4i64_vx( %va, i64 %b) { +; RV32-LABEL: sadd_nxv4i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsadd.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: sadd_nxv4i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv4i64( %va, %vb) + ret %v +} + +define @sadd_nxv4i64_vi( %va) { +; CHECK-LABEL: sadd_nxv4i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv4i64( %va, %vb) + ret %v +} + +declare @llvm.sadd.sat.nxv8i64(, ) + +define @sadd_nxv8i64_vv( %va, %b) { +; CHECK-LABEL: sadd_nxv8i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vsadd.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.sadd.sat.nxv8i64( %va, %b) + ret %v +} + +define @sadd_nxv8i64_vx( %va, i64 %b) { +; RV32-LABEL: sadd_nxv8i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsadd.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: sadd_nxv8i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vsadd.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv8i64( %va, %vb) + ret %v +} + +define @sadd_nxv8i64_vi( %va) { +; CHECK-LABEL: sadd_nxv8i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vsadd.vi v8, v8, 5 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 5, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.sadd.sat.nxv8i64( %va, %vb) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll @@ -0,0 +1,849 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.uadd.sat.nxv1i8(, ) + +define @uadd_nxv1i8_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv1i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv1i8( %va, %b) + ret %v +} + +define @uadd_nxv1i8_vx( %va, i8 %b) { +; CHECK-LABEL: uadd_nxv1i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv1i8( %va, %vb) + ret %v +} + +define @uadd_nxv1i8_vi( %va) { +; CHECK-LABEL: uadd_nxv1i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv1i8( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv2i8(, ) + +define @uadd_nxv2i8_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv2i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv2i8( %va, %b) + ret %v +} + +define @uadd_nxv2i8_vx( %va, i8 %b) { +; CHECK-LABEL: uadd_nxv2i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv2i8( %va, %vb) + ret %v +} + +define @uadd_nxv2i8_vi( %va) { +; CHECK-LABEL: uadd_nxv2i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv2i8( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv4i8(, ) + +define @uadd_nxv4i8_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv4i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv4i8( %va, %b) + ret %v +} + +define @uadd_nxv4i8_vx( %va, i8 %b) { +; CHECK-LABEL: uadd_nxv4i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv4i8( %va, %vb) + ret %v +} + +define @uadd_nxv4i8_vi( %va) { +; CHECK-LABEL: uadd_nxv4i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv4i8( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv8i8(, ) + +define @uadd_nxv8i8_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv8i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv8i8( %va, %b) + ret %v +} + +define @uadd_nxv8i8_vx( %va, i8 %b) { +; CHECK-LABEL: uadd_nxv8i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv8i8( %va, %vb) + ret %v +} + +define @uadd_nxv8i8_vi( %va) { +; CHECK-LABEL: uadd_nxv8i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv8i8( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv16i8(, ) + +define @uadd_nxv16i8_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv16i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv16i8( %va, %b) + ret %v +} + +define @uadd_nxv16i8_vx( %va, i8 %b) { +; CHECK-LABEL: uadd_nxv16i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv16i8( %va, %vb) + ret %v +} + +define @uadd_nxv16i8_vi( %va) { +; CHECK-LABEL: uadd_nxv16i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv16i8( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv32i8(, ) + +define @uadd_nxv32i8_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv32i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv32i8( %va, %b) + ret %v +} + +define @uadd_nxv32i8_vx( %va, i8 %b) { +; CHECK-LABEL: uadd_nxv32i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv32i8( %va, %vb) + ret %v +} + +define @uadd_nxv32i8_vi( %va) { +; CHECK-LABEL: uadd_nxv32i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv32i8( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv64i8(, ) + +define @uadd_nxv64i8_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv64i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv64i8( %va, %b) + ret %v +} + +define @uadd_nxv64i8_vx( %va, i8 %b) { +; CHECK-LABEL: uadd_nxv64i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv64i8( %va, %vb) + ret %v +} + +define @uadd_nxv64i8_vi( %va) { +; CHECK-LABEL: uadd_nxv64i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv64i8( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv1i16(, ) + +define @uadd_nxv1i16_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv1i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv1i16( %va, %b) + ret %v +} + +define @uadd_nxv1i16_vx( %va, i16 %b) { +; CHECK-LABEL: uadd_nxv1i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv1i16( %va, %vb) + ret %v +} + +define @uadd_nxv1i16_vi( %va) { +; CHECK-LABEL: uadd_nxv1i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv1i16( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv2i16(, ) + +define @uadd_nxv2i16_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv2i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv2i16( %va, %b) + ret %v +} + +define @uadd_nxv2i16_vx( %va, i16 %b) { +; CHECK-LABEL: uadd_nxv2i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv2i16( %va, %vb) + ret %v +} + +define @uadd_nxv2i16_vi( %va) { +; CHECK-LABEL: uadd_nxv2i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv2i16( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv4i16(, ) + +define @uadd_nxv4i16_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv4i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv4i16( %va, %b) + ret %v +} + +define @uadd_nxv4i16_vx( %va, i16 %b) { +; CHECK-LABEL: uadd_nxv4i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv4i16( %va, %vb) + ret %v +} + +define @uadd_nxv4i16_vi( %va) { +; CHECK-LABEL: uadd_nxv4i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv4i16( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv8i16(, ) + +define @uadd_nxv8i16_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv8i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv8i16( %va, %b) + ret %v +} + +define @uadd_nxv8i16_vx( %va, i16 %b) { +; CHECK-LABEL: uadd_nxv8i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv8i16( %va, %vb) + ret %v +} + +define @uadd_nxv8i16_vi( %va) { +; CHECK-LABEL: uadd_nxv8i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv8i16( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv16i16(, ) + +define @uadd_nxv16i16_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv16i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv16i16( %va, %b) + ret %v +} + +define @uadd_nxv16i16_vx( %va, i16 %b) { +; CHECK-LABEL: uadd_nxv16i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv16i16( %va, %vb) + ret %v +} + +define @uadd_nxv16i16_vi( %va) { +; CHECK-LABEL: uadd_nxv16i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv16i16( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv32i16(, ) + +define @uadd_nxv32i16_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv32i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv32i16( %va, %b) + ret %v +} + +define @uadd_nxv32i16_vx( %va, i16 %b) { +; CHECK-LABEL: uadd_nxv32i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv32i16( %va, %vb) + ret %v +} + +define @uadd_nxv32i16_vi( %va) { +; CHECK-LABEL: uadd_nxv32i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv32i16( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv1i32(, ) + +define @uadd_nxv1i32_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv1i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv1i32( %va, %b) + ret %v +} + +define @uadd_nxv1i32_vx( %va, i32 %b) { +; CHECK-LABEL: uadd_nxv1i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv1i32( %va, %vb) + ret %v +} + +define @uadd_nxv1i32_vi( %va) { +; CHECK-LABEL: uadd_nxv1i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv1i32( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv2i32(, ) + +define @uadd_nxv2i32_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv2i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv2i32( %va, %b) + ret %v +} + +define @uadd_nxv2i32_vx( %va, i32 %b) { +; CHECK-LABEL: uadd_nxv2i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv2i32( %va, %vb) + ret %v +} + +define @uadd_nxv2i32_vi( %va) { +; CHECK-LABEL: uadd_nxv2i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv2i32( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv4i32(, ) + +define @uadd_nxv4i32_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv4i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv4i32( %va, %b) + ret %v +} + +define @uadd_nxv4i32_vx( %va, i32 %b) { +; CHECK-LABEL: uadd_nxv4i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv4i32( %va, %vb) + ret %v +} + +define @uadd_nxv4i32_vi( %va) { +; CHECK-LABEL: uadd_nxv4i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv4i32( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv8i32(, ) + +define @uadd_nxv8i32_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv8i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv8i32( %va, %b) + ret %v +} + +define @uadd_nxv8i32_vx( %va, i32 %b) { +; CHECK-LABEL: uadd_nxv8i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv8i32( %va, %vb) + ret %v +} + +define @uadd_nxv8i32_vi( %va) { +; CHECK-LABEL: uadd_nxv8i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv8i32( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv16i32(, ) + +define @uadd_nxv16i32_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv16i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv16i32( %va, %b) + ret %v +} + +define @uadd_nxv16i32_vx( %va, i32 %b) { +; CHECK-LABEL: uadd_nxv16i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vsaddu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv16i32( %va, %vb) + ret %v +} + +define @uadd_nxv16i32_vi( %va) { +; CHECK-LABEL: uadd_nxv16i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv16i32( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv1i64(, ) + +define @uadd_nxv1i64_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv1i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv1i64( %va, %b) + ret %v +} + +define @uadd_nxv1i64_vx( %va, i64 %b) { +; RV32-LABEL: uadd_nxv1i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: uadd_nxv1i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv1i64( %va, %vb) + ret %v +} + +define @uadd_nxv1i64_vi( %va) { +; CHECK-LABEL: uadd_nxv1i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv1i64( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv2i64(, ) + +define @uadd_nxv2i64_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv2i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv2i64( %va, %b) + ret %v +} + +define @uadd_nxv2i64_vx( %va, i64 %b) { +; RV32-LABEL: uadd_nxv2i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: uadd_nxv2i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv2i64( %va, %vb) + ret %v +} + +define @uadd_nxv2i64_vi( %va) { +; CHECK-LABEL: uadd_nxv2i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv2i64( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv4i64(, ) + +define @uadd_nxv4i64_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv4i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv4i64( %va, %b) + ret %v +} + +define @uadd_nxv4i64_vx( %va, i64 %b) { +; RV32-LABEL: uadd_nxv4i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: uadd_nxv4i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv4i64( %va, %vb) + ret %v +} + +define @uadd_nxv4i64_vi( %va) { +; CHECK-LABEL: uadd_nxv4i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv4i64( %va, %vb) + ret %v +} + +declare @llvm.uadd.sat.nxv8i64(, ) + +define @uadd_nxv8i64_vv( %va, %b) { +; CHECK-LABEL: uadd_nxv8i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vsaddu.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.uadd.sat.nxv8i64( %va, %b) + ret %v +} + +define @uadd_nxv8i64_vx( %va, i64 %b) { +; RV32-LABEL: uadd_nxv8i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsaddu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: uadd_nxv8i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vsaddu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv8i64( %va, %vb) + ret %v +} + +define @uadd_nxv8i64_vi( %va) { +; CHECK-LABEL: uadd_nxv8i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vsaddu.vi v8, v8, 8 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 8, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.uadd.sat.nxv8i64( %va, %vb) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll @@ -0,0 +1,871 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.ssub.sat.nxv1i8(, ) + +define @ssub_nxv1i8_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv1i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv1i8( %va, %b) + ret %v +} + +define @ssub_nxv1i8_vx( %va, i8 %b) { +; CHECK-LABEL: ssub_nxv1i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv1i8( %va, %vb) + ret %v +} + +define @ssub_nxv1i8_vi( %va) { +; CHECK-LABEL: ssub_nxv1i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv1i8( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv2i8(, ) + +define @ssub_nxv2i8_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv2i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv2i8( %va, %b) + ret %v +} + +define @ssub_nxv2i8_vx( %va, i8 %b) { +; CHECK-LABEL: ssub_nxv2i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv2i8( %va, %vb) + ret %v +} + +define @ssub_nxv2i8_vi( %va) { +; CHECK-LABEL: ssub_nxv2i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv2i8( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv4i8(, ) + +define @ssub_nxv4i8_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv4i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv4i8( %va, %b) + ret %v +} + +define @ssub_nxv4i8_vx( %va, i8 %b) { +; CHECK-LABEL: ssub_nxv4i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv4i8( %va, %vb) + ret %v +} + +define @ssub_nxv4i8_vi( %va) { +; CHECK-LABEL: ssub_nxv4i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv4i8( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv8i8(, ) + +define @ssub_nxv8i8_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv8i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv8i8( %va, %b) + ret %v +} + +define @ssub_nxv8i8_vx( %va, i8 %b) { +; CHECK-LABEL: ssub_nxv8i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv8i8( %va, %vb) + ret %v +} + +define @ssub_nxv8i8_vi( %va) { +; CHECK-LABEL: ssub_nxv8i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv8i8( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv16i8(, ) + +define @ssub_nxv16i8_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv16i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv16i8( %va, %b) + ret %v +} + +define @ssub_nxv16i8_vx( %va, i8 %b) { +; CHECK-LABEL: ssub_nxv16i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv16i8( %va, %vb) + ret %v +} + +define @ssub_nxv16i8_vi( %va) { +; CHECK-LABEL: ssub_nxv16i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv16i8( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv32i8(, ) + +define @ssub_nxv32i8_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv32i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv32i8( %va, %b) + ret %v +} + +define @ssub_nxv32i8_vx( %va, i8 %b) { +; CHECK-LABEL: ssub_nxv32i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv32i8( %va, %vb) + ret %v +} + +define @ssub_nxv32i8_vi( %va) { +; CHECK-LABEL: ssub_nxv32i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv32i8( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv64i8(, ) + +define @ssub_nxv64i8_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv64i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv64i8( %va, %b) + ret %v +} + +define @ssub_nxv64i8_vx( %va, i8 %b) { +; CHECK-LABEL: ssub_nxv64i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv64i8( %va, %vb) + ret %v +} + +define @ssub_nxv64i8_vi( %va) { +; CHECK-LABEL: ssub_nxv64i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv64i8( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv1i16(, ) + +define @ssub_nxv1i16_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv1i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv1i16( %va, %b) + ret %v +} + +define @ssub_nxv1i16_vx( %va, i16 %b) { +; CHECK-LABEL: ssub_nxv1i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv1i16( %va, %vb) + ret %v +} + +define @ssub_nxv1i16_vi( %va) { +; CHECK-LABEL: ssub_nxv1i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv1i16( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv2i16(, ) + +define @ssub_nxv2i16_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv2i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv2i16( %va, %b) + ret %v +} + +define @ssub_nxv2i16_vx( %va, i16 %b) { +; CHECK-LABEL: ssub_nxv2i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv2i16( %va, %vb) + ret %v +} + +define @ssub_nxv2i16_vi( %va) { +; CHECK-LABEL: ssub_nxv2i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv2i16( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv4i16(, ) + +define @ssub_nxv4i16_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv4i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv4i16( %va, %b) + ret %v +} + +define @ssub_nxv4i16_vx( %va, i16 %b) { +; CHECK-LABEL: ssub_nxv4i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv4i16( %va, %vb) + ret %v +} + +define @ssub_nxv4i16_vi( %va) { +; CHECK-LABEL: ssub_nxv4i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv4i16( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv8i16(, ) + +define @ssub_nxv8i16_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv8i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv8i16( %va, %b) + ret %v +} + +define @ssub_nxv8i16_vx( %va, i16 %b) { +; CHECK-LABEL: ssub_nxv8i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv8i16( %va, %vb) + ret %v +} + +define @ssub_nxv8i16_vi( %va) { +; CHECK-LABEL: ssub_nxv8i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv8i16( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv16i16(, ) + +define @ssub_nxv16i16_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv16i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv16i16( %va, %b) + ret %v +} + +define @ssub_nxv16i16_vx( %va, i16 %b) { +; CHECK-LABEL: ssub_nxv16i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv16i16( %va, %vb) + ret %v +} + +define @ssub_nxv16i16_vi( %va) { +; CHECK-LABEL: ssub_nxv16i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv16i16( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv32i16(, ) + +define @ssub_nxv32i16_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv32i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv32i16( %va, %b) + ret %v +} + +define @ssub_nxv32i16_vx( %va, i16 %b) { +; CHECK-LABEL: ssub_nxv32i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv32i16( %va, %vb) + ret %v +} + +define @ssub_nxv32i16_vi( %va) { +; CHECK-LABEL: ssub_nxv32i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv32i16( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv1i32(, ) + +define @ssub_nxv1i32_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv1i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv1i32( %va, %b) + ret %v +} + +define @ssub_nxv1i32_vx( %va, i32 %b) { +; CHECK-LABEL: ssub_nxv1i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv1i32( %va, %vb) + ret %v +} + +define @ssub_nxv1i32_vi( %va) { +; CHECK-LABEL: ssub_nxv1i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv1i32( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv2i32(, ) + +define @ssub_nxv2i32_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv2i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv2i32( %va, %b) + ret %v +} + +define @ssub_nxv2i32_vx( %va, i32 %b) { +; CHECK-LABEL: ssub_nxv2i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv2i32( %va, %vb) + ret %v +} + +define @ssub_nxv2i32_vi( %va) { +; CHECK-LABEL: ssub_nxv2i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv2i32( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv4i32(, ) + +define @ssub_nxv4i32_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv4i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv4i32( %va, %b) + ret %v +} + +define @ssub_nxv4i32_vx( %va, i32 %b) { +; CHECK-LABEL: ssub_nxv4i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv4i32( %va, %vb) + ret %v +} + +define @ssub_nxv4i32_vi( %va) { +; CHECK-LABEL: ssub_nxv4i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv4i32( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv8i32(, ) + +define @ssub_nxv8i32_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv8i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv8i32( %va, %b) + ret %v +} + +define @ssub_nxv8i32_vx( %va, i32 %b) { +; CHECK-LABEL: ssub_nxv8i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv8i32( %va, %vb) + ret %v +} + +define @ssub_nxv8i32_vi( %va) { +; CHECK-LABEL: ssub_nxv8i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv8i32( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv16i32(, ) + +define @ssub_nxv16i32_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv16i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv16i32( %va, %b) + ret %v +} + +define @ssub_nxv16i32_vx( %va, i32 %b) { +; CHECK-LABEL: ssub_nxv16i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv16i32( %va, %vb) + ret %v +} + +define @ssub_nxv16i32_vi( %va) { +; CHECK-LABEL: ssub_nxv16i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv16i32( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv1i64(, ) + +define @ssub_nxv1i64_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv1i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv1i64( %va, %b) + ret %v +} + +define @ssub_nxv1i64_vx( %va, i64 %b) { +; RV32-LABEL: ssub_nxv1i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vssub.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: ssub_nxv1i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv1i64( %va, %vb) + ret %v +} + +define @ssub_nxv1i64_vi( %va) { +; CHECK-LABEL: ssub_nxv1i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv1i64( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv2i64(, ) + +define @ssub_nxv2i64_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv2i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv2i64( %va, %b) + ret %v +} + +define @ssub_nxv2i64_vx( %va, i64 %b) { +; RV32-LABEL: ssub_nxv2i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vssub.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: ssub_nxv2i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv2i64( %va, %vb) + ret %v +} + +define @ssub_nxv2i64_vi( %va) { +; CHECK-LABEL: ssub_nxv2i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv2i64( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv4i64(, ) + +define @ssub_nxv4i64_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv4i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv4i64( %va, %b) + ret %v +} + +define @ssub_nxv4i64_vx( %va, i64 %b) { +; RV32-LABEL: ssub_nxv4i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vssub.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: ssub_nxv4i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv4i64( %va, %vb) + ret %v +} + +define @ssub_nxv4i64_vi( %va) { +; CHECK-LABEL: ssub_nxv4i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv4i64( %va, %vb) + ret %v +} + +declare @llvm.ssub.sat.nxv8i64(, ) + +define @ssub_nxv8i64_vv( %va, %b) { +; CHECK-LABEL: ssub_nxv8i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vssub.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.ssub.sat.nxv8i64( %va, %b) + ret %v +} + +define @ssub_nxv8i64_vx( %va, i64 %b) { +; RV32-LABEL: ssub_nxv8i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vssub.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: ssub_nxv8i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vssub.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv8i64( %va, %vb) + ret %v +} + +define @ssub_nxv8i64_vi( %va) { +; CHECK-LABEL: ssub_nxv8i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 1 +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; CHECK-NEXT: vssub.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 1, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.ssub.sat.nxv8i64( %va, %vb) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll @@ -0,0 +1,871 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.usub.sat.nxv1i8(, ) + +define @usub_nxv1i8_vv( %va, %b) { +; CHECK-LABEL: usub_nxv1i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv1i8( %va, %b) + ret %v +} + +define @usub_nxv1i8_vx( %va, i8 %b) { +; CHECK-LABEL: usub_nxv1i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv1i8( %va, %vb) + ret %v +} + +define @usub_nxv1i8_vi( %va) { +; CHECK-LABEL: usub_nxv1i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv1i8( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv2i8(, ) + +define @usub_nxv2i8_vv( %va, %b) { +; CHECK-LABEL: usub_nxv2i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv2i8( %va, %b) + ret %v +} + +define @usub_nxv2i8_vx( %va, i8 %b) { +; CHECK-LABEL: usub_nxv2i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv2i8( %va, %vb) + ret %v +} + +define @usub_nxv2i8_vi( %va) { +; CHECK-LABEL: usub_nxv2i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv2i8( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv4i8(, ) + +define @usub_nxv4i8_vv( %va, %b) { +; CHECK-LABEL: usub_nxv4i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv4i8( %va, %b) + ret %v +} + +define @usub_nxv4i8_vx( %va, i8 %b) { +; CHECK-LABEL: usub_nxv4i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv4i8( %va, %vb) + ret %v +} + +define @usub_nxv4i8_vi( %va) { +; CHECK-LABEL: usub_nxv4i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv4i8( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv8i8(, ) + +define @usub_nxv8i8_vv( %va, %b) { +; CHECK-LABEL: usub_nxv8i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv8i8( %va, %b) + ret %v +} + +define @usub_nxv8i8_vx( %va, i8 %b) { +; CHECK-LABEL: usub_nxv8i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv8i8( %va, %vb) + ret %v +} + +define @usub_nxv8i8_vi( %va) { +; CHECK-LABEL: usub_nxv8i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv8i8( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv16i8(, ) + +define @usub_nxv16i8_vv( %va, %b) { +; CHECK-LABEL: usub_nxv16i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv16i8( %va, %b) + ret %v +} + +define @usub_nxv16i8_vx( %va, i8 %b) { +; CHECK-LABEL: usub_nxv16i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv16i8( %va, %vb) + ret %v +} + +define @usub_nxv16i8_vi( %va) { +; CHECK-LABEL: usub_nxv16i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv16i8( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv32i8(, ) + +define @usub_nxv32i8_vv( %va, %b) { +; CHECK-LABEL: usub_nxv32i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv32i8( %va, %b) + ret %v +} + +define @usub_nxv32i8_vx( %va, i8 %b) { +; CHECK-LABEL: usub_nxv32i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv32i8( %va, %vb) + ret %v +} + +define @usub_nxv32i8_vi( %va) { +; CHECK-LABEL: usub_nxv32i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv32i8( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv64i8(, ) + +define @usub_nxv64i8_vv( %va, %b) { +; CHECK-LABEL: usub_nxv64i8_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv64i8( %va, %b) + ret %v +} + +define @usub_nxv64i8_vx( %va, i8 %b) { +; CHECK-LABEL: usub_nxv64i8_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv64i8( %va, %vb) + ret %v +} + +define @usub_nxv64i8_vi( %va) { +; CHECK-LABEL: usub_nxv64i8_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i8 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv64i8( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv1i16(, ) + +define @usub_nxv1i16_vv( %va, %b) { +; CHECK-LABEL: usub_nxv1i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv1i16( %va, %b) + ret %v +} + +define @usub_nxv1i16_vx( %va, i16 %b) { +; CHECK-LABEL: usub_nxv1i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv1i16( %va, %vb) + ret %v +} + +define @usub_nxv1i16_vi( %va) { +; CHECK-LABEL: usub_nxv1i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv1i16( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv2i16(, ) + +define @usub_nxv2i16_vv( %va, %b) { +; CHECK-LABEL: usub_nxv2i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv2i16( %va, %b) + ret %v +} + +define @usub_nxv2i16_vx( %va, i16 %b) { +; CHECK-LABEL: usub_nxv2i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv2i16( %va, %vb) + ret %v +} + +define @usub_nxv2i16_vi( %va) { +; CHECK-LABEL: usub_nxv2i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv2i16( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv4i16(, ) + +define @usub_nxv4i16_vv( %va, %b) { +; CHECK-LABEL: usub_nxv4i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv4i16( %va, %b) + ret %v +} + +define @usub_nxv4i16_vx( %va, i16 %b) { +; CHECK-LABEL: usub_nxv4i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv4i16( %va, %vb) + ret %v +} + +define @usub_nxv4i16_vi( %va) { +; CHECK-LABEL: usub_nxv4i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv4i16( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv8i16(, ) + +define @usub_nxv8i16_vv( %va, %b) { +; CHECK-LABEL: usub_nxv8i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv8i16( %va, %b) + ret %v +} + +define @usub_nxv8i16_vx( %va, i16 %b) { +; CHECK-LABEL: usub_nxv8i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv8i16( %va, %vb) + ret %v +} + +define @usub_nxv8i16_vi( %va) { +; CHECK-LABEL: usub_nxv8i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv8i16( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv16i16(, ) + +define @usub_nxv16i16_vv( %va, %b) { +; CHECK-LABEL: usub_nxv16i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv16i16( %va, %b) + ret %v +} + +define @usub_nxv16i16_vx( %va, i16 %b) { +; CHECK-LABEL: usub_nxv16i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv16i16( %va, %vb) + ret %v +} + +define @usub_nxv16i16_vi( %va) { +; CHECK-LABEL: usub_nxv16i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv16i16( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv32i16(, ) + +define @usub_nxv32i16_vv( %va, %b) { +; CHECK-LABEL: usub_nxv32i16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv32i16( %va, %b) + ret %v +} + +define @usub_nxv32i16_vx( %va, i16 %b) { +; CHECK-LABEL: usub_nxv32i16_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv32i16( %va, %vb) + ret %v +} + +define @usub_nxv32i16_vi( %va) { +; CHECK-LABEL: usub_nxv32i16_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i16 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv32i16( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv1i32(, ) + +define @usub_nxv1i32_vv( %va, %b) { +; CHECK-LABEL: usub_nxv1i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv1i32( %va, %b) + ret %v +} + +define @usub_nxv1i32_vx( %va, i32 %b) { +; CHECK-LABEL: usub_nxv1i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv1i32( %va, %vb) + ret %v +} + +define @usub_nxv1i32_vi( %va) { +; CHECK-LABEL: usub_nxv1i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv1i32( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv2i32(, ) + +define @usub_nxv2i32_vv( %va, %b) { +; CHECK-LABEL: usub_nxv2i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv2i32( %va, %b) + ret %v +} + +define @usub_nxv2i32_vx( %va, i32 %b) { +; CHECK-LABEL: usub_nxv2i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv2i32( %va, %vb) + ret %v +} + +define @usub_nxv2i32_vi( %va) { +; CHECK-LABEL: usub_nxv2i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv2i32( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv4i32(, ) + +define @usub_nxv4i32_vv( %va, %b) { +; CHECK-LABEL: usub_nxv4i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv4i32( %va, %b) + ret %v +} + +define @usub_nxv4i32_vx( %va, i32 %b) { +; CHECK-LABEL: usub_nxv4i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv4i32( %va, %vb) + ret %v +} + +define @usub_nxv4i32_vi( %va) { +; CHECK-LABEL: usub_nxv4i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv4i32( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv8i32(, ) + +define @usub_nxv8i32_vv( %va, %b) { +; CHECK-LABEL: usub_nxv8i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv8i32( %va, %b) + ret %v +} + +define @usub_nxv8i32_vx( %va, i32 %b) { +; CHECK-LABEL: usub_nxv8i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv8i32( %va, %vb) + ret %v +} + +define @usub_nxv8i32_vi( %va) { +; CHECK-LABEL: usub_nxv8i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv8i32( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv16i32(, ) + +define @usub_nxv16i32_vv( %va, %b) { +; CHECK-LABEL: usub_nxv16i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv16i32( %va, %b) + ret %v +} + +define @usub_nxv16i32_vx( %va, i32 %b) { +; CHECK-LABEL: usub_nxv16i32_vx: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv16i32( %va, %vb) + ret %v +} + +define @usub_nxv16i32_vi( %va) { +; CHECK-LABEL: usub_nxv16i32_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i32 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv16i32( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv1i64(, ) + +define @usub_nxv1i64_vv( %va, %b) { +; CHECK-LABEL: usub_nxv1i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv1i64( %va, %b) + ret %v +} + +define @usub_nxv1i64_vx( %va, i64 %b) { +; RV32-LABEL: usub_nxv1i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vssubu.vv v8, v8, v25 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: usub_nxv1i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv1i64( %va, %vb) + ret %v +} + +define @usub_nxv1i64_vi( %va) { +; CHECK-LABEL: usub_nxv1i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv1i64( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv2i64(, ) + +define @usub_nxv2i64_vv( %va, %b) { +; CHECK-LABEL: usub_nxv2i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv2i64( %va, %b) + ret %v +} + +define @usub_nxv2i64_vx( %va, i64 %b) { +; RV32-LABEL: usub_nxv2i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vssubu.vv v8, v8, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: usub_nxv2i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv2i64( %va, %vb) + ret %v +} + +define @usub_nxv2i64_vi( %va) { +; CHECK-LABEL: usub_nxv2i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv2i64( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv4i64(, ) + +define @usub_nxv4i64_vv( %va, %b) { +; CHECK-LABEL: usub_nxv4i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v12 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv4i64( %va, %b) + ret %v +} + +define @usub_nxv4i64_vx( %va, i64 %b) { +; RV32-LABEL: usub_nxv4i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vssubu.vv v8, v8, v28 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: usub_nxv4i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv4i64( %va, %vb) + ret %v +} + +define @usub_nxv4i64_vi( %va) { +; CHECK-LABEL: usub_nxv4i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv4i64( %va, %vb) + ret %v +} + +declare @llvm.usub.sat.nxv8i64(, ) + +define @usub_nxv8i64_vv( %va, %b) { +; CHECK-LABEL: usub_nxv8i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vssubu.vv v8, v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.usub.sat.nxv8i64( %va, %b) + ret %v +} + +define @usub_nxv8i64_vx( %va, i64 %b) { +; RV32-LABEL: usub_nxv8i64_vx: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vssubu.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: usub_nxv8i64_vx: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vssubu.vx v8, v8, a0 +; RV64-NEXT: ret + %elt.head = insertelement undef, i64 %b, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv8i64( %va, %vb) + ret %v +} + +define @usub_nxv8i64_vi( %va) { +; CHECK-LABEL: usub_nxv8i64_vi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 2 +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; CHECK-NEXT: vssubu.vx v8, v8, a0 +; CHECK-NEXT: ret + %elt.head = insertelement undef, i64 2, i32 0 + %vb = shufflevector %elt.head, undef, zeroinitializer + %v = call @llvm.usub.sat.nxv8i64( %va, %vb) + ret %v +}