diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -284,6 +284,20 @@ SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<2, 4>, SDTCisVT<5, XLenVT> ]>; +def riscv_add_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D, + node:$E), + (riscv_add_vl node:$A, node:$B, node:$C, + node:$D, node:$E), [{ + return N->hasOneUse(); +}]>; + +def riscv_sub_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D, + node:$E), + (riscv_sub_vl node:$A, node:$B, node:$C, + node:$D, node:$E), [{ + return N->hasOneUse(); +}]>; + def riscv_mul_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D, node:$E), (riscv_mul_vl node:$A, node:$B, node:$C, @@ -1010,6 +1024,66 @@ } } +multiclass VPatMultiplyAccVL_VV_VX { + foreach vti = AllIntegerVectors in { + defvar suffix = vti.LMul.MX; + def : Pat<(riscv_vp_merge_vl (vti.Mask true_mask), + (vti.Vector (op vti.RegClass:$rd, + (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2, + srcvalue, (vti.Mask true_mask), VLOpFrag), + srcvalue, (vti.Mask true_mask), VLOpFrag)), + vti.RegClass:$rd, VLOpFrag), + (!cast(instruction_name#"_VV_"# suffix) + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>; + def : Pat<(riscv_vp_merge_vl (vti.Mask V0), + (vti.Vector (op vti.RegClass:$rd, + (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2, + srcvalue, (vti.Mask true_mask), VLOpFrag), + srcvalue, (vti.Mask true_mask), VLOpFrag)), + vti.RegClass:$rd, VLOpFrag), + (!cast(instruction_name#"_VV_"# suffix #"_MASK") + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>; + def : Pat<(riscv_vp_merge_vl (vti.Mask true_mask), + (vti.Vector (op vti.RegClass:$rd, + (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, + srcvalue, (vti.Mask true_mask), VLOpFrag), + srcvalue, (vti.Mask true_mask), VLOpFrag)), + vti.RegClass:$rd, VLOpFrag), + (!cast(instruction_name#"_VX_"# suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>; + def : Pat<(riscv_vp_merge_vl (vti.Mask V0), + (vti.Vector (op vti.RegClass:$rd, + (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, + srcvalue, (vti.Mask true_mask), VLOpFrag), + srcvalue, (vti.Mask true_mask), VLOpFrag)), + vti.RegClass:$rd, VLOpFrag), + (!cast(instruction_name#"_VX_"# suffix #"_MASK") + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>; + def : Pat<(riscv_vselect_vl (vti.Mask V0), + (vti.Vector (op vti.RegClass:$rd, + (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2, + srcvalue, (vti.Mask true_mask), VLOpFrag), + srcvalue, (vti.Mask true_mask), VLOpFrag)), + vti.RegClass:$rd, VLOpFrag), + (!cast(instruction_name#"_VV_"# suffix #"_MASK") + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_vselect_vl (vti.Mask V0), + (vti.Vector (op vti.RegClass:$rd, + (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, + srcvalue, (vti.Mask true_mask), VLOpFrag), + srcvalue, (vti.Mask true_mask), VLOpFrag)), + vti.RegClass:$rd, VLOpFrag), + (!cast(instruction_name#"_VX_"# suffix #"_MASK") + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } +} + multiclass VPatWidenMultiplyAddVL_VV_VX { foreach vtiTowti = AllWidenableIntVectors in { defvar vti = vtiTowti.Vti; @@ -1315,6 +1389,8 @@ // 12.13 Vector Single-Width Integer Multiply-Add Instructions defm : VPatMultiplyAddVL_VV_VX; defm : VPatMultiplyAddVL_VV_VX; +defm : VPatMultiplyAccVL_VV_VX; +defm : VPatMultiplyAccVL_VV_VX; // 12.14. Vector Widening Integer Multiply-Add Instructions defm : VPatWidenMultiplyAddVL_VV_VX; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll @@ -0,0 +1,1958 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) +declare <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) +declare <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32) +declare <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32) + +define <2 x i8> @vmacc_vv_nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) + ret <2 x i8> %u +} + +define <2 x i8> @vmacc_vv_nxv2i8_unmasked(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %allones, <2 x i8> %y, <2 x i8> %c, i32 %evl) + ret <2 x i8> %u +} + +define <2 x i8> @vmacc_vx_nxv2i8(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) + ret <2 x i8> %u +} + +define <2 x i8> @vmacc_vx_nxv2i8_unmasked(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %allones, <2 x i8> %y, <2 x i8> %c, i32 %evl) + ret <2 x i8> %u +} + +define <2 x i8> @vmacc_vv_nxv2i8_ta(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) + ret <2 x i8> %u +} + +define <2 x i8> @vmacc_vx_nxv2i8_ta(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) + ret <2 x i8> %u +} + +declare <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) +declare <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) +declare <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32) +declare <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32) + +define <4 x i8> @vmacc_vv_nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) + ret <4 x i8> %u +} + +define <4 x i8> @vmacc_vv_nxv4i8_unmasked(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %allones, <4 x i8> %y, <4 x i8> %c, i32 %evl) + ret <4 x i8> %u +} + +define <4 x i8> @vmacc_vx_nxv4i8(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) + ret <4 x i8> %u +} + +define <4 x i8> @vmacc_vx_nxv4i8_unmasked(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %allones, <4 x i8> %y, <4 x i8> %c, i32 %evl) + ret <4 x i8> %u +} + +define <4 x i8> @vmacc_vv_nxv4i8_ta(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) + ret <4 x i8> %u +} + +define <4 x i8> @vmacc_vx_nxv4i8_ta(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) + ret <4 x i8> %u +} + +declare <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) +declare <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) +declare <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32) +declare <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32) + +define <8 x i8> @vmacc_vv_nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) + ret <8 x i8> %u +} + +define <8 x i8> @vmacc_vv_nxv8i8_unmasked(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %allones, <8 x i8> %y, <8 x i8> %c, i32 %evl) + ret <8 x i8> %u +} + +define <8 x i8> @vmacc_vx_nxv8i8(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) + ret <8 x i8> %u +} + +define <8 x i8> @vmacc_vx_nxv8i8_unmasked(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %allones, <8 x i8> %y, <8 x i8> %c, i32 %evl) + ret <8 x i8> %u +} + +define <8 x i8> @vmacc_vv_nxv8i8_ta(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) + ret <8 x i8> %u +} + +define <8 x i8> @vmacc_vx_nxv8i8_ta(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) + ret <8 x i8> %u +} + +declare <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) +declare <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) +declare <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32) +declare <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32) + +define <16 x i8> @vmacc_vv_nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) + ret <16 x i8> %u +} + +define <16 x i8> @vmacc_vv_nxv16i8_unmasked(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %allones, <16 x i8> %y, <16 x i8> %c, i32 %evl) + ret <16 x i8> %u +} + +define <16 x i8> @vmacc_vx_nxv16i8(<16 x i8> %a, i8 %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) + ret <16 x i8> %u +} + +define <16 x i8> @vmacc_vx_nxv16i8_unmasked(<16 x i8> %a, i8 %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %allones, <16 x i8> %y, <16 x i8> %c, i32 %evl) + ret <16 x i8> %u +} + +define <16 x i8> @vmacc_vv_nxv16i8_ta(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) + ret <16 x i8> %u +} + +define <16 x i8> @vmacc_vx_nxv16i8_ta(<16 x i8> %a, i8 %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) + ret <16 x i8> %u +} + +declare <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8>, <32 x i8>, <32 x i1>, i32) +declare <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8>, <32 x i8>, <32 x i1>, i32) +declare <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1>, <32 x i8>, <32 x i8>, i32) +declare <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1>, <32 x i8>, <32 x i8>, i32) + +define <32 x i8> @vmacc_vv_nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl) + %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) + ret <32 x i8> %u +} + +define <32 x i8> @vmacc_vv_nxv32i8_unmasked(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, ma +; CHECK-NEXT: vmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl) + %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %allones, <32 x i8> %y, <32 x i8> %c, i32 %evl) + ret <32 x i8> %u +} + +define <32 x i8> @vmacc_vx_nxv32i8(<32 x i8> %a, i8 %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, mu +; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl) + %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) + ret <32 x i8> %u +} + +define <32 x i8> @vmacc_vx_nxv32i8_unmasked(<32 x i8> %a, i8 %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, ma +; CHECK-NEXT: vmacc.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl) + %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %allones, <32 x i8> %y, <32 x i8> %c, i32 %evl) + ret <32 x i8> %u +} + +define <32 x i8> @vmacc_vv_nxv32i8_ta(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv32i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl) + %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) + ret <32 x i8> %u +} + +define <32 x i8> @vmacc_vx_nxv32i8_ta(<32 x i8> %a, i8 %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv32i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl) + %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) + ret <32 x i8> %u +} + +declare <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8>, <64 x i8>, <64 x i1>, i32) +declare <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8>, <64 x i8>, <64 x i1>, i32) +declare <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1>, <64 x i8>, <64 x i8>, i32) +declare <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1>, <64 x i8>, <64 x i8>, i32) + +define <64 x i8> @vmacc_vv_nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <64 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl) + %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl) + %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) + ret <64 x i8> %u +} + +define <64 x i8> @vmacc_vv_nxv64i8_unmasked(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma +; CHECK-NEXT: vmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <64 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl) + %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl) + %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %allones, <64 x i8> %y, <64 x i8> %c, i32 %evl) + ret <64 x i8> %u +} + +define <64 x i8> @vmacc_vx_nxv64i8(<64 x i8> %a, i8 %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, mu +; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer + %splat = insertelement <64 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl) + %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl) + %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) + ret <64 x i8> %u +} + +define <64 x i8> @vmacc_vx_nxv64i8_unmasked(<64 x i8> %a, i8 %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, ma +; CHECK-NEXT: vmacc.vx v12, a0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer + %splat = insertelement <64 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl) + %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl) + %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %allones, <64 x i8> %y, <64 x i8> %c, i32 %evl) + ret <64 x i8> %u +} + +define <64 x i8> @vmacc_vv_nxv64i8_ta(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv64i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <64 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl) + %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl) + %u = call <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) + ret <64 x i8> %u +} + +define <64 x i8> @vmacc_vx_nxv64i8_ta(<64 x i8> %a, i8 %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv64i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer + %splat = insertelement <64 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl) + %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl) + %u = call <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) + ret <64 x i8> %u +} + +declare <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) +declare <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) +declare <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32) +declare <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32) + +define <2 x i16> @vmacc_vv_nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) + ret <2 x i16> %u +} + +define <2 x i16> @vmacc_vv_nxv2i16_unmasked(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %allones, <2 x i16> %y, <2 x i16> %c, i32 %evl) + ret <2 x i16> %u +} + +define <2 x i16> @vmacc_vx_nxv2i16(<2 x i16> %a, i16 %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) + ret <2 x i16> %u +} + +define <2 x i16> @vmacc_vx_nxv2i16_unmasked(<2 x i16> %a, i16 %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %allones, <2 x i16> %y, <2 x i16> %c, i32 %evl) + ret <2 x i16> %u +} + +define <2 x i16> @vmacc_vv_nxv2i16_ta(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) + ret <2 x i16> %u +} + +define <2 x i16> @vmacc_vx_nxv2i16_ta(<2 x i16> %a, i16 %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) + ret <2 x i16> %u +} + +declare <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) +declare <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) +declare <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32) +declare <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32) + +define <4 x i16> @vmacc_vv_nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) + ret <4 x i16> %u +} + +define <4 x i16> @vmacc_vv_nxv4i16_unmasked(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %allones, <4 x i16> %y, <4 x i16> %c, i32 %evl) + ret <4 x i16> %u +} + +define <4 x i16> @vmacc_vx_nxv4i16(<4 x i16> %a, i16 %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) + ret <4 x i16> %u +} + +define <4 x i16> @vmacc_vx_nxv4i16_unmasked(<4 x i16> %a, i16 %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %allones, <4 x i16> %y, <4 x i16> %c, i32 %evl) + ret <4 x i16> %u +} + +define <4 x i16> @vmacc_vv_nxv4i16_ta(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) + ret <4 x i16> %u +} + +define <4 x i16> @vmacc_vx_nxv4i16_ta(<4 x i16> %a, i16 %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) + ret <4 x i16> %u +} + +declare <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) +declare <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) +declare <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32) +declare <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32) + +define <8 x i16> @vmacc_vv_nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) + ret <8 x i16> %u +} + +define <8 x i16> @vmacc_vv_nxv8i16_unmasked(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %allones, <8 x i16> %y, <8 x i16> %c, i32 %evl) + ret <8 x i16> %u +} + +define <8 x i16> @vmacc_vx_nxv8i16(<8 x i16> %a, i16 %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) + ret <8 x i16> %u +} + +define <8 x i16> @vmacc_vx_nxv8i16_unmasked(<8 x i16> %a, i16 %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %allones, <8 x i16> %y, <8 x i16> %c, i32 %evl) + ret <8 x i16> %u +} + +define <8 x i16> @vmacc_vv_nxv8i16_ta(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) + ret <8 x i16> %u +} + +define <8 x i16> @vmacc_vx_nxv8i16_ta(<8 x i16> %a, i16 %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) + ret <8 x i16> %u +} + +declare <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) +declare <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) +declare <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32) +declare <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32) + +define <16 x i16> @vmacc_vv_nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) + ret <16 x i16> %u +} + +define <16 x i16> @vmacc_vv_nxv16i16_unmasked(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma +; CHECK-NEXT: vmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %allones, <16 x i16> %y, <16 x i16> %c, i32 %evl) + ret <16 x i16> %u +} + +define <16 x i16> @vmacc_vx_nxv16i16(<16 x i16> %a, i16 %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu +; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) + ret <16 x i16> %u +} + +define <16 x i16> @vmacc_vx_nxv16i16_unmasked(<16 x i16> %a, i16 %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; CHECK-NEXT: vmacc.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %allones, <16 x i16> %y, <16 x i16> %c, i32 %evl) + ret <16 x i16> %u +} + +define <16 x i16> @vmacc_vv_nxv16i16_ta(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) + ret <16 x i16> %u +} + +define <16 x i16> @vmacc_vx_nxv16i16_ta(<16 x i16> %a, i16 %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) + ret <16 x i16> %u +} + +declare <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16>, <32 x i16>, <32 x i1>, i32) +declare <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16>, <32 x i16>, <32 x i1>, i32) +declare <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1>, <32 x i16>, <32 x i16>, i32) +declare <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @vmacc_vv_nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl) + %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) + ret <32 x i16> %u +} + +define <32 x i16> @vmacc_vv_nxv32i16_unmasked(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma +; CHECK-NEXT: vmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl) + %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %allones, <32 x i16> %y, <32 x i16> %c, i32 %evl) + ret <32 x i16> %u +} + +define <32 x i16> @vmacc_vx_nxv32i16(<32 x i16> %a, i16 %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu +; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl) + %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) + ret <32 x i16> %u +} + +define <32 x i16> @vmacc_vx_nxv32i16_unmasked(<32 x i16> %a, i16 %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; CHECK-NEXT: vmacc.vx v12, a0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl) + %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %allones, <32 x i16> %y, <32 x i16> %c, i32 %evl) + ret <32 x i16> %u +} + +define <32 x i16> @vmacc_vv_nxv32i16_ta(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv32i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl) + %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) + ret <32 x i16> %u +} + +define <32 x i16> @vmacc_vx_nxv32i16_ta(<32 x i16> %a, i16 %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv32i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl) + %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl) + %u = call <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) + ret <32 x i16> %u +} + +declare <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) +declare <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) +declare <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32) +declare <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32) + +define <2 x i32> @vmacc_vv_nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) + ret <2 x i32> %u +} + +define <2 x i32> @vmacc_vv_nxv2i32_unmasked(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %allones, <2 x i32> %y, <2 x i32> %c, i32 %evl) + ret <2 x i32> %u +} + +define <2 x i32> @vmacc_vx_nxv2i32(<2 x i32> %a, i32 %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) + ret <2 x i32> %u +} + +define <2 x i32> @vmacc_vx_nxv2i32_unmasked(<2 x i32> %a, i32 %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %allones, <2 x i32> %y, <2 x i32> %c, i32 %evl) + ret <2 x i32> %u +} + +define <2 x i32> @vmacc_vv_nxv2i32_ta(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) + ret <2 x i32> %u +} + +define <2 x i32> @vmacc_vx_nxv2i32_ta(<2 x i32> %a, i32 %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) + ret <2 x i32> %u +} + +declare <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) +declare <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) +declare <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32) +declare <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32) + +define <4 x i32> @vmacc_vv_nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) + ret <4 x i32> %u +} + +define <4 x i32> @vmacc_vv_nxv4i32_unmasked(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %allones, <4 x i32> %y, <4 x i32> %c, i32 %evl) + ret <4 x i32> %u +} + +define <4 x i32> @vmacc_vx_nxv4i32(<4 x i32> %a, i32 %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) + ret <4 x i32> %u +} + +define <4 x i32> @vmacc_vx_nxv4i32_unmasked(<4 x i32> %a, i32 %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %allones, <4 x i32> %y, <4 x i32> %c, i32 %evl) + ret <4 x i32> %u +} + +define <4 x i32> @vmacc_vv_nxv4i32_ta(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) + ret <4 x i32> %u +} + +define <4 x i32> @vmacc_vx_nxv4i32_ta(<4 x i32> %a, i32 %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) + ret <4 x i32> %u +} + +declare <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) +declare <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) + +define <8 x i32> @vmacc_vv_nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) + ret <8 x i32> %u +} + +define <8 x i32> @vmacc_vv_nxv8i32_unmasked(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %allones, <8 x i32> %y, <8 x i32> %c, i32 %evl) + ret <8 x i32> %u +} + +define <8 x i32> @vmacc_vx_nxv8i32(<8 x i32> %a, i32 %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu +; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) + ret <8 x i32> %u +} + +define <8 x i32> @vmacc_vx_nxv8i32_unmasked(<8 x i32> %a, i32 %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; CHECK-NEXT: vmacc.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %allones, <8 x i32> %y, <8 x i32> %c, i32 %evl) + ret <8 x i32> %u +} + +define <8 x i32> @vmacc_vv_nxv8i32_ta(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) + ret <8 x i32> %u +} + +define <8 x i32> @vmacc_vx_nxv8i32_ta(<8 x i32> %a, i32 %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) + ret <8 x i32> %u +} + +declare <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) +declare <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) +declare <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32) +declare <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32) + +define <16 x i32> @vmacc_vv_nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) + ret <16 x i32> %u +} + +define <16 x i32> @vmacc_vv_nxv16i32_unmasked(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma +; CHECK-NEXT: vmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %allones, <16 x i32> %y, <16 x i32> %c, i32 %evl) + ret <16 x i32> %u +} + +define <16 x i32> @vmacc_vx_nxv16i32(<16 x i32> %a, i32 %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu +; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) + ret <16 x i32> %u +} + +define <16 x i32> @vmacc_vx_nxv16i32_unmasked(<16 x i32> %a, i32 %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma +; CHECK-NEXT: vmacc.vx v12, a0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %allones, <16 x i32> %y, <16 x i32> %c, i32 %evl) + ret <16 x i32> %u +} + +define <16 x i32> @vmacc_vv_nxv16i32_ta(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) + ret <16 x i32> %u +} + +define <16 x i32> @vmacc_vx_nxv16i32_ta(<16 x i32> %a, i32 %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl) + %u = call <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) + ret <16 x i32> %u +} + +declare <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) +declare <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) +declare <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32) +declare <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32) + +define <2 x i64> @vmacc_vv_nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) + ret <2 x i64> %u +} + +define <2 x i64> @vmacc_vv_nxv2i64_unmasked(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %allones, <2 x i64> %y, <2 x i64> %c, i32 %evl) + ret <2 x i64> %u +} + +define <2 x i64> @vmacc_vx_nxv2i64(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu +; RV32-NEXT: vmacc.vv v9, v8, v10, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vmacc.vx v9, a0, v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) + ret <2 x i64> %u +} + +define <2 x i64> @vmacc_vx_nxv2i64_unmasked(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma +; RV32-NEXT: vmacc.vv v9, v8, v10 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma +; RV64-NEXT: vmacc.vx v9, a0, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %allones, <2 x i64> %y, <2 x i64> %c, i32 %evl) + ret <2 x i64> %u +} + +define <2 x i64> @vmacc_vv_nxv2i64_ta(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) + ret <2 x i64> %u +} + +define <2 x i64> @vmacc_vx_nxv2i64_ta(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv2i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmacc.vv v9, v8, v10, v0.t +; RV32-NEXT: vmv.v.v v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv2i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vmacc.vx v9, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v9 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl) + %u = call <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) + ret <2 x i64> %u +} + +declare <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) +declare <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) +declare <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32) +declare <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32) + +define <4 x i64> @vmacc_vv_nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) + ret <4 x i64> %u +} + +define <4 x i64> @vmacc_vv_nxv4i64_unmasked(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma +; CHECK-NEXT: vmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %allones, <4 x i64> %y, <4 x i64> %c, i32 %evl) + ret <4 x i64> %u +} + +define <4 x i64> @vmacc_vx_nxv4i64(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu +; RV32-NEXT: vmacc.vv v10, v8, v12, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu +; RV64-NEXT: vmacc.vx v10, a0, v8, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) + ret <4 x i64> %u +} + +define <4 x i64> @vmacc_vx_nxv4i64_unmasked(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma +; RV32-NEXT: vmacc.vv v10, v8, v12 +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma +; RV64-NEXT: vmacc.vx v10, a0, v8 +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %allones, <4 x i64> %y, <4 x i64> %c, i32 %evl) + ret <4 x i64> %u +} + +define <4 x i64> @vmacc_vv_nxv4i64_ta(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) + ret <4 x i64> %u +} + +define <4 x i64> @vmacc_vx_nxv4i64_ta(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv4i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmacc.vv v10, v8, v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv4i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vmacc.vx v10, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl) + %u = call <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) + ret <4 x i64> %u +} + +declare <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32) +declare <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32) + +define <8 x i64> @vmacc_vv_nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) + ret <8 x i64> %u +} + +define <8 x i64> @vmacc_vv_nxv8i64_unmasked(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma +; CHECK-NEXT: vmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %allones, <8 x i64> %y, <8 x i64> %c, i32 %evl) + ret <8 x i64> %u +} + +define <8 x i64> @vmacc_vx_nxv8i64(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu +; RV32-NEXT: vmacc.vv v12, v8, v16, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu +; RV64-NEXT: vmacc.vx v12, a0, v8, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) + ret <8 x i64> %u +} + +define <8 x i64> @vmacc_vx_nxv8i64_unmasked(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma +; RV32-NEXT: vmacc.vv v12, v8, v16 +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma +; RV64-NEXT: vmacc.vx v12, a0, v8 +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %allones, <8 x i64> %y, <8 x i64> %c, i32 %evl) + ret <8 x i64> %u +} + +define <8 x i64> @vmacc_vv_nxv8i64_ta(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) + ret <8 x i64> %u +} + +define <8 x i64> @vmacc_vx_nxv8i64_ta(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv8i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmacc.vv v12, v8, v16, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv8i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vmacc.vx v12, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl) + %u = call <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) + ret <8 x i64> %u +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll @@ -0,0 +1,1958 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +declare <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) +declare <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32) +declare <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32) +declare <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32) + +define <2 x i8> @vnmsac_vv_nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) + ret <2 x i8> %u +} + +define <2 x i8> @vnmsac_vv_nxv2i8_unmasked(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %allones, <2 x i8> %y, <2 x i8> %c, i32 %evl) + ret <2 x i8> %u +} + +define <2 x i8> @vnmsac_vx_nxv2i8(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) + ret <2 x i8> %u +} + +define <2 x i8> @vnmsac_vx_nxv2i8_unmasked(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %allones, <2 x i8> %y, <2 x i8> %c, i32 %evl) + ret <2 x i8> %u +} + +define <2 x i8> @vnmsac_vv_nxv2i8_ta(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) + ret <2 x i8> %u +} + +define <2 x i8> @vnmsac_vx_nxv2i8_ta(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) + ret <2 x i8> %u +} + +declare <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) +declare <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8>, <4 x i8>, <4 x i1>, i32) +declare <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32) +declare <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32) + +define <4 x i8> @vnmsac_vv_nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) + ret <4 x i8> %u +} + +define <4 x i8> @vnmsac_vv_nxv4i8_unmasked(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %allones, <4 x i8> %y, <4 x i8> %c, i32 %evl) + ret <4 x i8> %u +} + +define <4 x i8> @vnmsac_vx_nxv4i8(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) + ret <4 x i8> %u +} + +define <4 x i8> @vnmsac_vx_nxv4i8_unmasked(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %allones, <4 x i8> %y, <4 x i8> %c, i32 %evl) + ret <4 x i8> %u +} + +define <4 x i8> @vnmsac_vv_nxv4i8_ta(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) + ret <4 x i8> %u +} + +define <4 x i8> @vnmsac_vx_nxv4i8_ta(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) + ret <4 x i8> %u +} + +declare <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) +declare <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8>, <8 x i8>, <8 x i1>, i32) +declare <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32) +declare <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32) + +define <8 x i8> @vnmsac_vv_nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) + ret <8 x i8> %u +} + +define <8 x i8> @vnmsac_vv_nxv8i8_unmasked(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %allones, <8 x i8> %y, <8 x i8> %c, i32 %evl) + ret <8 x i8> %u +} + +define <8 x i8> @vnmsac_vx_nxv8i8(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) + ret <8 x i8> %u +} + +define <8 x i8> @vnmsac_vx_nxv8i8_unmasked(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %allones, <8 x i8> %y, <8 x i8> %c, i32 %evl) + ret <8 x i8> %u +} + +define <8 x i8> @vnmsac_vv_nxv8i8_ta(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) + ret <8 x i8> %u +} + +define <8 x i8> @vnmsac_vx_nxv8i8_ta(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) + ret <8 x i8> %u +} + +declare <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) +declare <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8>, <16 x i8>, <16 x i1>, i32) +declare <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32) +declare <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32) + +define <16 x i8> @vnmsac_vv_nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) + ret <16 x i8> %u +} + +define <16 x i8> @vnmsac_vv_nxv16i8_unmasked(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %allones, <16 x i8> %y, <16 x i8> %c, i32 %evl) + ret <16 x i8> %u +} + +define <16 x i8> @vnmsac_vx_nxv16i8(<16 x i8> %a, i8 %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) + ret <16 x i8> %u +} + +define <16 x i8> @vnmsac_vx_nxv16i8_unmasked(<16 x i8> %a, i8 %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %allones, <16 x i8> %y, <16 x i8> %c, i32 %evl) + ret <16 x i8> %u +} + +define <16 x i8> @vnmsac_vv_nxv16i8_ta(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) + ret <16 x i8> %u +} + +define <16 x i8> @vnmsac_vx_nxv16i8_ta(<16 x i8> %a, i8 %b, <16 x i8> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) + ret <16 x i8> %u +} + +declare <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8>, <32 x i8>, <32 x i1>, i32) +declare <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8>, <32 x i8>, <32 x i1>, i32) +declare <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1>, <32 x i8>, <32 x i8>, i32) +declare <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1>, <32 x i8>, <32 x i8>, i32) + +define <32 x i8> @vnmsac_vv_nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl) + %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> %allones, i32 %evl) + %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) + ret <32 x i8> %u +} + +define <32 x i8> @vnmsac_vv_nxv32i8_unmasked(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, ma +; CHECK-NEXT: vnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl) + %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> %allones, i32 %evl) + %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %allones, <32 x i8> %y, <32 x i8> %c, i32 %evl) + ret <32 x i8> %u +} + +define <32 x i8> @vnmsac_vx_nxv32i8(<32 x i8> %a, i8 %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, mu +; CHECK-NEXT: vnmsac.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl) + %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> %allones, i32 %evl) + %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) + ret <32 x i8> %u +} + +define <32 x i8> @vnmsac_vx_nxv32i8_unmasked(<32 x i8> %a, i8 %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, ma +; CHECK-NEXT: vnmsac.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl) + %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> %allones, i32 %evl) + %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %allones, <32 x i8> %y, <32 x i8> %c, i32 %evl) + ret <32 x i8> %u +} + +define <32 x i8> @vnmsac_vv_nxv32i8_ta(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv32i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl) + %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> %allones, i32 %evl) + %u = call <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) + ret <32 x i8> %u +} + +define <32 x i8> @vnmsac_vx_nxv32i8_ta(<32 x i8> %a, i8 %b, <32 x i8> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv32i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vnmsac.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl) + %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> %allones, i32 %evl) + %u = call <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) + ret <32 x i8> %u +} + +declare <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8>, <64 x i8>, <64 x i1>, i32) +declare <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8>, <64 x i8>, <64 x i1>, i32) +declare <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1>, <64 x i8>, <64 x i8>, i32) +declare <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1>, <64 x i8>, <64 x i8>, i32) + +define <64 x i8> @vnmsac_vv_nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <64 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl) + %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> %allones, i32 %evl) + %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) + ret <64 x i8> %u +} + +define <64 x i8> @vnmsac_vv_nxv64i8_unmasked(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma +; CHECK-NEXT: vnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <64 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl) + %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> %allones, i32 %evl) + %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %allones, <64 x i8> %y, <64 x i8> %c, i32 %evl) + ret <64 x i8> %u +} + +define <64 x i8> @vnmsac_vx_nxv64i8(<64 x i8> %a, i8 %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, mu +; CHECK-NEXT: vnmsac.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer + %splat = insertelement <64 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl) + %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> %allones, i32 %evl) + %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) + ret <64 x i8> %u +} + +define <64 x i8> @vnmsac_vx_nxv64i8_unmasked(<64 x i8> %a, i8 %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, ma +; CHECK-NEXT: vnmsac.vx v12, a0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer + %splat = insertelement <64 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl) + %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> %allones, i32 %evl) + %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %allones, <64 x i8> %y, <64 x i8> %c, i32 %evl) + ret <64 x i8> %u +} + +define <64 x i8> @vnmsac_vv_nxv64i8_ta(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv64i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <64 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl) + %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> %allones, i32 %evl) + %u = call <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) + ret <64 x i8> %u +} + +define <64 x i8> @vnmsac_vx_nxv64i8_ta(<64 x i8> %a, i8 %b, <64 x i8> %c, <64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv64i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vnmsac.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0 + %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer + %splat = insertelement <64 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl) + %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> %allones, i32 %evl) + %u = call <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) + ret <64 x i8> %u +} + +declare <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) +declare <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16>, <2 x i16>, <2 x i1>, i32) +declare <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32) +declare <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32) + +define <2 x i16> @vnmsac_vv_nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) + ret <2 x i16> %u +} + +define <2 x i16> @vnmsac_vv_nxv2i16_unmasked(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %allones, <2 x i16> %y, <2 x i16> %c, i32 %evl) + ret <2 x i16> %u +} + +define <2 x i16> @vnmsac_vx_nxv2i16(<2 x i16> %a, i16 %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) + ret <2 x i16> %u +} + +define <2 x i16> @vnmsac_vx_nxv2i16_unmasked(<2 x i16> %a, i16 %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %allones, <2 x i16> %y, <2 x i16> %c, i32 %evl) + ret <2 x i16> %u +} + +define <2 x i16> @vnmsac_vv_nxv2i16_ta(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) + ret <2 x i16> %u +} + +define <2 x i16> @vnmsac_vx_nxv2i16_ta(<2 x i16> %a, i16 %b, <2 x i16> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) + ret <2 x i16> %u +} + +declare <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) +declare <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16>, <4 x i16>, <4 x i1>, i32) +declare <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32) +declare <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32) + +define <4 x i16> @vnmsac_vv_nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) + ret <4 x i16> %u +} + +define <4 x i16> @vnmsac_vv_nxv4i16_unmasked(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %allones, <4 x i16> %y, <4 x i16> %c, i32 %evl) + ret <4 x i16> %u +} + +define <4 x i16> @vnmsac_vx_nxv4i16(<4 x i16> %a, i16 %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) + ret <4 x i16> %u +} + +define <4 x i16> @vnmsac_vx_nxv4i16_unmasked(<4 x i16> %a, i16 %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %allones, <4 x i16> %y, <4 x i16> %c, i32 %evl) + ret <4 x i16> %u +} + +define <4 x i16> @vnmsac_vv_nxv4i16_ta(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) + ret <4 x i16> %u +} + +define <4 x i16> @vnmsac_vx_nxv4i16_ta(<4 x i16> %a, i16 %b, <4 x i16> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) + ret <4 x i16> %u +} + +declare <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) +declare <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16>, <8 x i16>, <8 x i1>, i32) +declare <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32) +declare <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32) + +define <8 x i16> @vnmsac_vv_nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) + ret <8 x i16> %u +} + +define <8 x i16> @vnmsac_vv_nxv8i16_unmasked(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %allones, <8 x i16> %y, <8 x i16> %c, i32 %evl) + ret <8 x i16> %u +} + +define <8 x i16> @vnmsac_vx_nxv8i16(<8 x i16> %a, i16 %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) + ret <8 x i16> %u +} + +define <8 x i16> @vnmsac_vx_nxv8i16_unmasked(<8 x i16> %a, i16 %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %allones, <8 x i16> %y, <8 x i16> %c, i32 %evl) + ret <8 x i16> %u +} + +define <8 x i16> @vnmsac_vv_nxv8i16_ta(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) + ret <8 x i16> %u +} + +define <8 x i16> @vnmsac_vx_nxv8i16_ta(<8 x i16> %a, i16 %b, <8 x i16> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) + ret <8 x i16> %u +} + +declare <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) +declare <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16>, <16 x i16>, <16 x i1>, i32) +declare <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32) +declare <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32) + +define <16 x i16> @vnmsac_vv_nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) + ret <16 x i16> %u +} + +define <16 x i16> @vnmsac_vv_nxv16i16_unmasked(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma +; CHECK-NEXT: vnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %allones, <16 x i16> %y, <16 x i16> %c, i32 %evl) + ret <16 x i16> %u +} + +define <16 x i16> @vnmsac_vx_nxv16i16(<16 x i16> %a, i16 %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu +; CHECK-NEXT: vnmsac.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) + ret <16 x i16> %u +} + +define <16 x i16> @vnmsac_vx_nxv16i16_unmasked(<16 x i16> %a, i16 %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; CHECK-NEXT: vnmsac.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %allones, <16 x i16> %y, <16 x i16> %c, i32 %evl) + ret <16 x i16> %u +} + +define <16 x i16> @vnmsac_vv_nxv16i16_ta(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) + ret <16 x i16> %u +} + +define <16 x i16> @vnmsac_vx_nxv16i16_ta(<16 x i16> %a, i16 %b, <16 x i16> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vnmsac.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) + ret <16 x i16> %u +} + +declare <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16>, <32 x i16>, <32 x i1>, i32) +declare <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16>, <32 x i16>, <32 x i1>, i32) +declare <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1>, <32 x i16>, <32 x i16>, i32) +declare <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16> @vnmsac_vv_nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl) + %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> %allones, i32 %evl) + %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) + ret <32 x i16> %u +} + +define <32 x i16> @vnmsac_vv_nxv32i16_unmasked(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma +; CHECK-NEXT: vnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl) + %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> %allones, i32 %evl) + %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %allones, <32 x i16> %y, <32 x i16> %c, i32 %evl) + ret <32 x i16> %u +} + +define <32 x i16> @vnmsac_vx_nxv32i16(<32 x i16> %a, i16 %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu +; CHECK-NEXT: vnmsac.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl) + %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> %allones, i32 %evl) + %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) + ret <32 x i16> %u +} + +define <32 x i16> @vnmsac_vx_nxv32i16_unmasked(<32 x i16> %a, i16 %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; CHECK-NEXT: vnmsac.vx v12, a0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl) + %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> %allones, i32 %evl) + %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %allones, <32 x i16> %y, <32 x i16> %c, i32 %evl) + ret <32 x i16> %u +} + +define <32 x i16> @vnmsac_vv_nxv32i16_ta(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv32i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl) + %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> %allones, i32 %evl) + %u = call <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) + ret <32 x i16> %u +} + +define <32 x i16> @vnmsac_vx_nxv32i16_ta(<32 x i16> %a, i16 %b, <32 x i16> %c, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv32i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vnmsac.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0 + %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer + %splat = insertelement <32 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl) + %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> %allones, i32 %evl) + %u = call <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) + ret <32 x i16> %u +} + +declare <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) +declare <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32) +declare <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32) +declare <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32) + +define <2 x i32> @vnmsac_vv_nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) + ret <2 x i32> %u +} + +define <2 x i32> @vnmsac_vv_nxv2i32_unmasked(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %allones, <2 x i32> %y, <2 x i32> %c, i32 %evl) + ret <2 x i32> %u +} + +define <2 x i32> @vnmsac_vx_nxv2i32(<2 x i32> %a, i32 %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) + ret <2 x i32> %u +} + +define <2 x i32> @vnmsac_vx_nxv2i32_unmasked(<2 x i32> %a, i32 %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %allones, <2 x i32> %y, <2 x i32> %c, i32 %evl) + ret <2 x i32> %u +} + +define <2 x i32> @vnmsac_vv_nxv2i32_ta(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) + ret <2 x i32> %u +} + +define <2 x i32> @vnmsac_vx_nxv2i32_ta(<2 x i32> %a, i32 %b, <2 x i32> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) + ret <2 x i32> %u +} + +declare <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) +declare <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32) +declare <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32) +declare <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32) + +define <4 x i32> @vnmsac_vv_nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) + ret <4 x i32> %u +} + +define <4 x i32> @vnmsac_vv_nxv4i32_unmasked(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %allones, <4 x i32> %y, <4 x i32> %c, i32 %evl) + ret <4 x i32> %u +} + +define <4 x i32> @vnmsac_vx_nxv4i32(<4 x i32> %a, i32 %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) + ret <4 x i32> %u +} + +define <4 x i32> @vnmsac_vx_nxv4i32_unmasked(<4 x i32> %a, i32 %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %allones, <4 x i32> %y, <4 x i32> %c, i32 %evl) + ret <4 x i32> %u +} + +define <4 x i32> @vnmsac_vv_nxv4i32_ta(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) + ret <4 x i32> %u +} + +define <4 x i32> @vnmsac_vx_nxv4i32_ta(<4 x i32> %a, i32 %b, <4 x i32> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) + ret <4 x i32> %u +} + +declare <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) +declare <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) + +define <8 x i32> @vnmsac_vv_nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) + ret <8 x i32> %u +} + +define <8 x i32> @vnmsac_vv_nxv8i32_unmasked(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %allones, <8 x i32> %y, <8 x i32> %c, i32 %evl) + ret <8 x i32> %u +} + +define <8 x i32> @vnmsac_vx_nxv8i32(<8 x i32> %a, i32 %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu +; CHECK-NEXT: vnmsac.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) + ret <8 x i32> %u +} + +define <8 x i32> @vnmsac_vx_nxv8i32_unmasked(<8 x i32> %a, i32 %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; CHECK-NEXT: vnmsac.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %allones, <8 x i32> %y, <8 x i32> %c, i32 %evl) + ret <8 x i32> %u +} + +define <8 x i32> @vnmsac_vv_nxv8i32_ta(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) + ret <8 x i32> %u +} + +define <8 x i32> @vnmsac_vx_nxv8i32_ta(<8 x i32> %a, i32 %b, <8 x i32> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vnmsac.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) + ret <8 x i32> %u +} + +declare <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) +declare <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32>, <16 x i32>, <16 x i1>, i32) +declare <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32) +declare <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32) + +define <16 x i32> @vnmsac_vv_nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) + ret <16 x i32> %u +} + +define <16 x i32> @vnmsac_vv_nxv16i32_unmasked(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma +; CHECK-NEXT: vnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %allones, <16 x i32> %y, <16 x i32> %c, i32 %evl) + ret <16 x i32> %u +} + +define <16 x i32> @vnmsac_vx_nxv16i32(<16 x i32> %a, i32 %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu +; CHECK-NEXT: vnmsac.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) + ret <16 x i32> %u +} + +define <16 x i32> @vnmsac_vx_nxv16i32_unmasked(<16 x i32> %a, i32 %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma +; CHECK-NEXT: vnmsac.vx v12, a0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %allones, <16 x i32> %y, <16 x i32> %c, i32 %evl) + ret <16 x i32> %u +} + +define <16 x i32> @vnmsac_vv_nxv16i32_ta(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl) + %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) + ret <16 x i32> %u +} + +define <16 x i32> @vnmsac_vx_nxv16i32_ta(<16 x i32> %a, i32 %b, <16 x i32> %c, <16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vnmsac.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 + %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer + %splat = insertelement <16 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl) + %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> %allones, i32 %evl) + %u = call <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) + ret <16 x i32> %u +} + +declare <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) +declare <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64>, <2 x i64>, <2 x i1>, i32) +declare <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32) +declare <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32) + +define <2 x i64> @vnmsac_vv_nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) + ret <2 x i64> %u +} + +define <2 x i64> @vnmsac_vv_nxv2i64_unmasked(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %allones, <2 x i64> %y, <2 x i64> %c, i32 %evl) + ret <2 x i64> %u +} + +define <2 x i64> @vnmsac_vx_nxv2i64(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu +; RV32-NEXT: vnmsac.vv v9, v8, v10, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vnmsac.vx v9, a0, v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) + ret <2 x i64> %u +} + +define <2 x i64> @vnmsac_vx_nxv2i64_unmasked(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma +; RV32-NEXT: vnmsac.vv v9, v8, v10 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma +; RV64-NEXT: vnmsac.vx v9, a0, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %allones, <2 x i64> %y, <2 x i64> %c, i32 %evl) + ret <2 x i64> %u +} + +define <2 x i64> @vnmsac_vv_nxv2i64_ta(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl) + %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) + ret <2 x i64> %u +} + +define <2 x i64> @vnmsac_vx_nxv2i64_ta(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv2i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vnmsac.vv v9, v8, v10, v0.t +; RV32-NEXT: vmv.v.v v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv2i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vnmsac.vx v9, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v9 +; RV64-NEXT: ret + %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer + %splat = insertelement <2 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl) + %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> %allones, i32 %evl) + %u = call <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) + ret <2 x i64> %u +} + +declare <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) +declare <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64>, <4 x i64>, <4 x i1>, i32) +declare <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32) +declare <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32) + +define <4 x i64> @vnmsac_vv_nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) + ret <4 x i64> %u +} + +define <4 x i64> @vnmsac_vv_nxv4i64_unmasked(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma +; CHECK-NEXT: vnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %allones, <4 x i64> %y, <4 x i64> %c, i32 %evl) + ret <4 x i64> %u +} + +define <4 x i64> @vnmsac_vx_nxv4i64(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu +; RV32-NEXT: vnmsac.vv v10, v8, v12, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu +; RV64-NEXT: vnmsac.vx v10, a0, v8, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) + ret <4 x i64> %u +} + +define <4 x i64> @vnmsac_vx_nxv4i64_unmasked(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma +; RV32-NEXT: vnmsac.vv v10, v8, v12 +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma +; RV64-NEXT: vnmsac.vx v10, a0, v8 +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %allones, <4 x i64> %y, <4 x i64> %c, i32 %evl) + ret <4 x i64> %u +} + +define <4 x i64> @vnmsac_vv_nxv4i64_ta(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl) + %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) + ret <4 x i64> %u +} + +define <4 x i64> @vnmsac_vx_nxv4i64_ta(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv4i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vnmsac.vv v10, v8, v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv4i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vnmsac.vx v10, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer + %splat = insertelement <4 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl) + %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> %allones, i32 %evl) + %u = call <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) + ret <4 x i64> %u +} + +declare <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64>, <8 x i64>, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32) +declare <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32) + +define <8 x i64> @vnmsac_vv_nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) + ret <8 x i64> %u +} + +define <8 x i64> @vnmsac_vv_nxv8i64_unmasked(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma +; CHECK-NEXT: vnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %allones, <8 x i64> %y, <8 x i64> %c, i32 %evl) + ret <8 x i64> %u +} + +define <8 x i64> @vnmsac_vx_nxv8i64(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu +; RV32-NEXT: vnmsac.vv v12, v8, v16, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu +; RV64-NEXT: vnmsac.vx v12, a0, v8, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) + ret <8 x i64> %u +} + +define <8 x i64> @vnmsac_vx_nxv8i64_unmasked(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma +; RV32-NEXT: vnmsac.vv v12, v8, v16 +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma +; RV64-NEXT: vnmsac.vx v12, a0, v8 +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %allones, <8 x i64> %y, <8 x i64> %c, i32 %evl) + ret <8 x i64> %u +} + +define <8 x i64> @vnmsac_vv_nxv8i64_ta(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl) + %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) + ret <8 x i64> %u +} + +define <8 x i64> @vnmsac_vx_nxv8i64_ta(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv8i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vnmsac.vv v12, v8, v16, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv8i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vnmsac.vx v12, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 + %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl) + %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> %allones, i32 %evl) + %u = call <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) + ret <8 x i64> %u +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll @@ -0,0 +1,2419 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.mul.nxv1i8(, , , i32) +declare @llvm.vp.add.nxv1i8(, , , i32) +declare @llvm.vp.merge.nxv1i8(, , , i32) +declare @llvm.vp.select.nxv1i8(, , , i32) + +define @vmacc_vv_nxv1i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv1i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv1i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv1i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv1i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv1i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv1i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv1i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv2i8(, , , i32) +declare @llvm.vp.add.nxv2i8(, , , i32) +declare @llvm.vp.merge.nxv2i8(, , , i32) +declare @llvm.vp.select.nxv2i8(, , , i32) + +define @vmacc_vv_nxv2i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv2i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv2i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv2i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv2i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv2i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv4i8(, , , i32) +declare @llvm.vp.add.nxv4i8(, , , i32) +declare @llvm.vp.merge.nxv4i8(, , , i32) +declare @llvm.vp.select.nxv4i8(, , , i32) + +define @vmacc_vv_nxv4i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv4i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv4i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv4i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv4i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv4i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv8i8(, , , i32) +declare @llvm.vp.add.nxv8i8(, , , i32) +declare @llvm.vp.merge.nxv8i8(, , , i32) +declare @llvm.vp.select.nxv8i8(, , , i32) + +define @vmacc_vv_nxv8i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv8i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv8i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv8i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv8i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv8i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv16i8(, , , i32) +declare @llvm.vp.add.nxv16i8(, , , i32) +declare @llvm.vp.merge.nxv16i8(, , , i32) +declare @llvm.vp.select.nxv16i8(, , , i32) + +define @vmacc_vv_nxv16i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv16i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, ma +; CHECK-NEXT: vmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv16i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, mu +; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv16i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, ma +; CHECK-NEXT: vmacc.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv16i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv16i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv32i8(, , , i32) +declare @llvm.vp.add.nxv32i8(, , , i32) +declare @llvm.vp.merge.nxv32i8(, , , i32) +declare @llvm.vp.select.nxv32i8(, , , i32) + +define @vmacc_vv_nxv32i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv32i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma +; CHECK-NEXT: vmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv32i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, mu +; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv32i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, ma +; CHECK-NEXT: vmacc.vx v12, a0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv32i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv32i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv32i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv32i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv64i8(, , , i32) +declare @llvm.vp.add.nxv64i8(, , , i32) +declare @llvm.vp.merge.nxv64i8(, , , i32) +declare @llvm.vp.select.nxv64i8(, , , i32) + +define @vmacc_vv_nxv64i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu +; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv64i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv64i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu +; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv64i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv64i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv64i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv64i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv64i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv64i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv64i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv1i16(, , , i32) +declare @llvm.vp.add.nxv1i16(, , , i32) +declare @llvm.vp.merge.nxv1i16(, , , i32) +declare @llvm.vp.select.nxv1i16(, , , i32) + +define @vmacc_vv_nxv1i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv1i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv1i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv1i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv1i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv1i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv1i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv1i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i16( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv2i16(, , , i32) +declare @llvm.vp.add.nxv2i16(, , , i32) +declare @llvm.vp.merge.nxv2i16(, , , i32) +declare @llvm.vp.select.nxv2i16(, , , i32) + +define @vmacc_vv_nxv2i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv2i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv2i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv2i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv2i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv2i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i16( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv4i16(, , , i32) +declare @llvm.vp.add.nxv4i16(, , , i32) +declare @llvm.vp.merge.nxv4i16(, , , i32) +declare @llvm.vp.select.nxv4i16(, , , i32) + +define @vmacc_vv_nxv4i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv4i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv4i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv4i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv4i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv4i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i16( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv8i16(, , , i32) +declare @llvm.vp.add.nxv8i16(, , , i32) +declare @llvm.vp.merge.nxv8i16(, , , i32) +declare @llvm.vp.select.nxv8i16(, , , i32) + +define @vmacc_vv_nxv8i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv8i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma +; CHECK-NEXT: vmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv8i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu +; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv8i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; CHECK-NEXT: vmacc.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv8i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv8i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i16( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv16i16(, , , i32) +declare @llvm.vp.add.nxv16i16(, , , i32) +declare @llvm.vp.merge.nxv16i16(, , , i32) +declare @llvm.vp.select.nxv16i16(, , , i32) + +define @vmacc_vv_nxv16i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv16i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma +; CHECK-NEXT: vmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv16i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu +; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv16i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; CHECK-NEXT: vmacc.vx v12, a0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv16i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv16i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i16( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv32i16(, , , i32) +declare @llvm.vp.add.nxv32i16(, , , i32) +declare @llvm.vp.merge.nxv32i16(, , , i32) +declare @llvm.vp.select.nxv32i16(, , , i32) + +define @vmacc_vv_nxv32i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv32i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv32i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv32i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv32i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv32i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv32i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv32i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32i16( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv1i32(, , , i32) +declare @llvm.vp.add.nxv1i32(, , , i32) +declare @llvm.vp.merge.nxv1i32(, , , i32) +declare @llvm.vp.select.nxv1i32(, , , i32) + +define @vmacc_vv_nxv1i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv1i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv1i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv1i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv1i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv1i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv1i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv1i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i32( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv2i32(, , , i32) +declare @llvm.vp.add.nxv2i32(, , , i32) +declare @llvm.vp.merge.nxv2i32(, , , i32) +declare @llvm.vp.select.nxv2i32(, , , i32) + +define @vmacc_vv_nxv2i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv2i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv2i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv2i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma +; CHECK-NEXT: vmacc.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv2i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv2i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv2i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vmacc.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i32( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv4i32(, , , i32) +declare @llvm.vp.add.nxv4i32(, , , i32) +declare @llvm.vp.merge.nxv4i32(, , , i32) +declare @llvm.vp.select.nxv4i32(, , , i32) + +define @vmacc_vv_nxv4i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv4i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv4i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu +; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv4i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; CHECK-NEXT: vmacc.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv4i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv4i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv4i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vmacc.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i32( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv8i32(, , , i32) +declare @llvm.vp.add.nxv8i32(, , , i32) +declare @llvm.vp.merge.nxv8i32(, , , i32) +declare @llvm.vp.select.nxv8i32(, , , i32) + +define @vmacc_vv_nxv8i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv8i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma +; CHECK-NEXT: vmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv8i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu +; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv8i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma +; CHECK-NEXT: vmacc.vx v12, a0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv8i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv8i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv8i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vmacc.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i32( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv16i32(, , , i32) +declare @llvm.vp.add.nxv16i32(, , , i32) +declare @llvm.vp.merge.nxv16i32(, , , i32) +declare @llvm.vp.select.nxv16i32(, , , i32) + +define @vmacc_vv_nxv16i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv16i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv16i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv16i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv16i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv16i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv16i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vx_nxv16i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vmacc.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i32( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv1i64(, , , i32) +declare @llvm.vp.add.nxv1i64(, , , i32) +declare @llvm.vp.merge.nxv1i64(, , , i32) +declare @llvm.vp.select.nxv1i64(, , , i32) + +define @vmacc_vv_nxv1i64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv1i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma +; CHECK-NEXT: vmacc.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv1i64( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu +; RV32-NEXT: vmacc.vv v9, v8, v10, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vmacc.vx v9, a0, v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv1i64_unmasked( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma +; RV32-NEXT: vmacc.vv v9, v8, v10 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma +; RV64-NEXT: vmacc.vx v9, a0, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv1i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv1i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv1i64_ta( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv1i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmacc.vv v9, v8, v10, v0.t +; RV32-NEXT: vmv.v.v v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv1i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vmacc.vx v9, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v9 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i64( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv2i64(, , , i32) +declare @llvm.vp.add.nxv2i64(, , , i32) +declare @llvm.vp.merge.nxv2i64(, , , i32) +declare @llvm.vp.select.nxv2i64(, , , i32) + +define @vmacc_vv_nxv2i64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv2i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma +; CHECK-NEXT: vmacc.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv2i64( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu +; RV32-NEXT: vmacc.vv v10, v8, v12, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu +; RV64-NEXT: vmacc.vx v10, a0, v8, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv2i64_unmasked( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma +; RV32-NEXT: vmacc.vv v10, v8, v12 +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma +; RV64-NEXT: vmacc.vx v10, a0, v8 +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv2i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv2i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv2i64_ta( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv2i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmacc.vv v10, v8, v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv2i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vmacc.vx v10, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i64( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv4i64(, , , i32) +declare @llvm.vp.add.nxv4i64(, , , i32) +declare @llvm.vp.merge.nxv4i64(, , , i32) +declare @llvm.vp.select.nxv4i64(, , , i32) + +define @vmacc_vv_nxv4i64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv4i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma +; CHECK-NEXT: vmacc.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv4i64( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu +; RV32-NEXT: vmacc.vv v12, v8, v16, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu +; RV64-NEXT: vmacc.vx v12, a0, v8, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv4i64_unmasked( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma +; RV32-NEXT: vmacc.vv v12, v8, v16 +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma +; RV64-NEXT: vmacc.vx v12, a0, v8 +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv4i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv4i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv4i64_ta( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv4i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmacc.vv v12, v8, v16, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv4i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vmacc.vx v12, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i64( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv8i64(, , , i32) +declare @llvm.vp.add.nxv8i64(, , , i32) +declare @llvm.vp.merge.nxv8i64(, , , i32) +declare @llvm.vp.select.nxv8i64(, , , i32) + +define @vmacc_vv_nxv8i64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv8i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; CHECK-NEXT: vmacc.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv8i64( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu +; RV32-NEXT: vmacc.vv v16, v8, v24, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; RV64-NEXT: vmacc.vx v16, a0, v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv8i64_unmasked( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, ma +; RV32-NEXT: vmacc.vv v16, v8, v24 +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; RV64-NEXT: vmacc.vx v16, a0, v8 +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vv_nxv8i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vmacc_vv_nxv8i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vmacc_vx_nxv8i64_ta( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vmacc_vx_nxv8i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vmacc.vv v16, v8, v24, v0.t +; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmacc_vx_nxv8i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vmacc.vx v16, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i64( %m, %y, %c, i32 %evl) + ret %u +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll @@ -0,0 +1,2419 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-zvfh,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +declare @llvm.vp.mul.nxv1i8(, , , i32) +declare @llvm.vp.sub.nxv1i8(, , , i32) +declare @llvm.vp.merge.nxv1i8(, , , i32) +declare @llvm.vp.select.nxv1i8(, , , i32) + +define @vnmsac_vv_nxv1i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv1i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv1i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv1i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv1i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv1i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv1i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv1i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv1i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv2i8(, , , i32) +declare @llvm.vp.sub.nxv2i8(, , , i32) +declare @llvm.vp.merge.nxv2i8(, , , i32) +declare @llvm.vp.select.nxv2i8(, , , i32) + +define @vnmsac_vv_nxv2i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv2i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv2i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv2i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv2i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv2i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv4i8(, , , i32) +declare @llvm.vp.sub.nxv4i8(, , , i32) +declare @llvm.vp.merge.nxv4i8(, , , i32) +declare @llvm.vp.select.nxv4i8(, , , i32) + +define @vnmsac_vv_nxv4i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv4i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv4i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv4i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv4i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv4i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv8i8(, , , i32) +declare @llvm.vp.sub.nxv8i8(, , , i32) +declare @llvm.vp.merge.nxv8i8(, , , i32) +declare @llvm.vp.select.nxv8i8(, , , i32) + +define @vnmsac_vv_nxv8i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv8i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv8i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv8i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv8i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv8i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv16i8(, , , i32) +declare @llvm.vp.sub.nxv16i8(, , , i32) +declare @llvm.vp.merge.nxv16i8(, , , i32) +declare @llvm.vp.select.nxv16i8(, , , i32) + +define @vnmsac_vv_nxv16i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv16i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, ma +; CHECK-NEXT: vnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv16i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, mu +; CHECK-NEXT: vnmsac.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv16i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, ma +; CHECK-NEXT: vnmsac.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv16i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv16i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; CHECK-NEXT: vnmsac.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv32i8(, , , i32) +declare @llvm.vp.sub.nxv32i8(, , , i32) +declare @llvm.vp.merge.nxv32i8(, , , i32) +declare @llvm.vp.select.nxv32i8(, , , i32) + +define @vnmsac_vv_nxv32i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv32i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv32i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma +; CHECK-NEXT: vnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv32i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv32i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, mu +; CHECK-NEXT: vnmsac.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv32i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv32i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv32i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, ma +; CHECK-NEXT: vnmsac.vx v12, a0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv32i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv32i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv32i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv32i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv32i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv32i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: vnmsac.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv32i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv64i8(, , , i32) +declare @llvm.vp.sub.nxv64i8(, , , i32) +declare @llvm.vp.merge.nxv64i8(, , , i32) +declare @llvm.vp.select.nxv64i8(, , , i32) + +define @vnmsac_vv_nxv64i8( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu +; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv64i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv64i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma +; CHECK-NEXT: vnmsac.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv64i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv64i8( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu +; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv64i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv64i8_unmasked( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv64i8_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma +; CHECK-NEXT: vnmsac.vx v16, a0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv64i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv64i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv64i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv64i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv64i8( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv64i8_ta( %a, i8 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv64i8_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i8 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv64i8( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv64i8( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv1i16(, , , i32) +declare @llvm.vp.sub.nxv1i16(, , , i32) +declare @llvm.vp.merge.nxv1i16(, , , i32) +declare @llvm.vp.select.nxv1i16(, , , i32) + +define @vnmsac_vv_nxv1i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv1i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv1i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv1i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv1i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv1i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv1i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv1i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv1i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i16( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv2i16(, , , i32) +declare @llvm.vp.sub.nxv2i16(, , , i32) +declare @llvm.vp.merge.nxv2i16(, , , i32) +declare @llvm.vp.select.nxv2i16(, , , i32) + +define @vnmsac_vv_nxv2i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv2i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv2i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv2i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv2i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv2i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i16( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv4i16(, , , i32) +declare @llvm.vp.sub.nxv4i16(, , , i32) +declare @llvm.vp.merge.nxv4i16(, , , i32) +declare @llvm.vp.select.nxv4i16(, , , i32) + +define @vnmsac_vv_nxv4i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv4i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv4i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv4i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv4i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv4i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i16( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv8i16(, , , i32) +declare @llvm.vp.sub.nxv8i16(, , , i32) +declare @llvm.vp.merge.nxv8i16(, , , i32) +declare @llvm.vp.select.nxv8i16(, , , i32) + +define @vnmsac_vv_nxv8i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv8i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma +; CHECK-NEXT: vnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv8i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, mu +; CHECK-NEXT: vnmsac.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv8i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma +; CHECK-NEXT: vnmsac.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv8i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv8i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu +; CHECK-NEXT: vnmsac.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i16( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv16i16(, , , i32) +declare @llvm.vp.sub.nxv16i16(, , , i32) +declare @llvm.vp.merge.nxv16i16(, , , i32) +declare @llvm.vp.select.nxv16i16(, , , i32) + +define @vnmsac_vv_nxv16i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv16i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma +; CHECK-NEXT: vnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv16i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, mu +; CHECK-NEXT: vnmsac.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv16i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma +; CHECK-NEXT: vnmsac.vx v12, a0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv16i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv16i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu +; CHECK-NEXT: vnmsac.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i16( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv32i16(, , , i32) +declare @llvm.vp.sub.nxv32i16(, , , i32) +declare @llvm.vp.merge.nxv32i16(, , , i32) +declare @llvm.vp.select.nxv32i16(, , , i32) + +define @vnmsac_vv_nxv32i16( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv32i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv32i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vnmsac.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv32i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv32i16( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu +; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv32i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv32i16_unmasked( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv32i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma +; CHECK-NEXT: vnmsac.vx v16, a0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv32i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv32i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv32i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv32i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32i16( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv32i16_ta( %a, i16 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv32i16_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i16 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv32i16( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv32i16( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv1i32(, , , i32) +declare @llvm.vp.sub.nxv1i32(, , , i32) +declare @llvm.vp.merge.nxv1i32(, , , i32) +declare @llvm.vp.select.nxv1i32(, , , i32) + +define @vnmsac_vv_nxv1i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv1i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv1i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv1i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv1i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv1i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv1i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv1i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv1i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i32( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv2i32(, , , i32) +declare @llvm.vp.sub.nxv2i32(, , , i32) +declare @llvm.vp.merge.nxv2i32(, , , i32) +declare @llvm.vp.select.nxv2i32(, , , i32) + +define @vnmsac_vv_nxv2i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv2i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv2i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv2i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma +; CHECK-NEXT: vnmsac.vx v9, a0, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv2i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv2i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv2i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vnmsac.vx v9, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i32( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv4i32(, , , i32) +declare @llvm.vp.sub.nxv4i32(, , , i32) +declare @llvm.vp.merge.nxv4i32(, , , i32) +declare @llvm.vp.select.nxv4i32(, , , i32) + +define @vnmsac_vv_nxv4i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv4i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma +; CHECK-NEXT: vnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv4i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu +; CHECK-NEXT: vnmsac.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv4i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; CHECK-NEXT: vnmsac.vx v10, a0, v8 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv4i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv4i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv4i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vnmsac.vx v10, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i32( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv8i32(, , , i32) +declare @llvm.vp.sub.nxv8i32(, , , i32) +declare @llvm.vp.merge.nxv8i32(, , , i32) +declare @llvm.vp.select.nxv8i32(, , , i32) + +define @vnmsac_vv_nxv8i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv8i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma +; CHECK-NEXT: vnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv8i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, mu +; CHECK-NEXT: vnmsac.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv8i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma +; CHECK-NEXT: vnmsac.vx v12, a0, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv8i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv8i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv8i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vnmsac.vx v12, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i32( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv16i32(, , , i32) +declare @llvm.vp.sub.nxv16i32(, , , i32) +declare @llvm.vp.merge.nxv16i32(, , , i32) +declare @llvm.vp.select.nxv16i32(, , , i32) + +define @vnmsac_vv_nxv16i32( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv16i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vnmsac.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv16i32( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu +; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv16i32_unmasked( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma +; CHECK-NEXT: vnmsac.vx v16, a0, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv16i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv16i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i32( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv16i32_ta( %a, i32 %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vx_nxv16i32_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-NEXT: vnmsac.vx v16, a0, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %elt.head = insertelement poison, i32 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv16i32( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv16i32( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv1i64(, , , i32) +declare @llvm.vp.sub.nxv1i64(, , , i32) +declare @llvm.vp.merge.nxv1i64(, , , i32) +declare @llvm.vp.select.nxv1i64(, , , i32) + +define @vnmsac_vv_nxv1i64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv1i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv1i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma +; CHECK-NEXT: vnmsac.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv1i64( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, mu +; RV32-NEXT: vnmsac.vv v9, v8, v10, v0.t +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, mu +; RV64-NEXT: vnmsac.vx v9, a0, v8, v0.t +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv1i64_unmasked( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv1i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, tu, ma +; RV32-NEXT: vnmsac.vv v9, v8, v10 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv1i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, tu, ma +; RV64-NEXT: vnmsac.vx v9, a0, v8 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv1i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv1i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv1i64_ta( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv1i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vnmsac.vv v9, v8, v10, v0.t +; RV32-NEXT: vmv.v.v v8, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv1i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; RV64-NEXT: vnmsac.vx v9, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v9 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv1i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv1i64( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv2i64(, , , i32) +declare @llvm.vp.sub.nxv2i64(, , , i32) +declare @llvm.vp.merge.nxv2i64(, , , i32) +declare @llvm.vp.select.nxv2i64(, , , i32) + +define @vnmsac_vv_nxv2i64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv2i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma +; CHECK-NEXT: vnmsac.vv v12, v8, v10 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv2i64( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, mu +; RV32-NEXT: vnmsac.vv v10, v8, v12, v0.t +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, mu +; RV64-NEXT: vnmsac.vx v10, a0, v8, v0.t +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv2i64_unmasked( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv2i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, tu, ma +; RV32-NEXT: vnmsac.vv v10, v8, v12 +; RV32-NEXT: vmv2r.v v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv2i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, tu, ma +; RV64-NEXT: vnmsac.vx v10, a0, v8 +; RV64-NEXT: vmv2r.v v8, v10 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv2i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv2i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv2i64_ta( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv2i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vnmsac.vv v10, v8, v12, v0.t +; RV32-NEXT: vmv.v.v v8, v10 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv2i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; RV64-NEXT: vnmsac.vx v10, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v10 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv2i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv2i64( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv4i64(, , , i32) +declare @llvm.vp.sub.nxv4i64(, , , i32) +declare @llvm.vp.merge.nxv4i64(, , , i32) +declare @llvm.vp.select.nxv4i64(, , , i32) + +define @vnmsac_vv_nxv4i64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv4i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma +; CHECK-NEXT: vnmsac.vv v16, v8, v12 +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv4i64( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, mu +; RV32-NEXT: vnmsac.vv v12, v8, v16, v0.t +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, mu +; RV64-NEXT: vnmsac.vx v12, a0, v8, v0.t +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv4i64_unmasked( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv4i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma +; RV32-NEXT: vnmsac.vv v12, v8, v16 +; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv4i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, tu, ma +; RV64-NEXT: vnmsac.vx v12, a0, v8 +; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv4i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv4i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu +; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv4i64_ta( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv4i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vnmsac.vv v12, v8, v16, v0.t +; RV32-NEXT: vmv.v.v v8, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv4i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; RV64-NEXT: vnmsac.vx v12, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv4i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv4i64( %m, %y, %c, i32 %evl) + ret %u +} + +declare @llvm.vp.mul.nxv8i64(, , , i32) +declare @llvm.vp.sub.nxv8i64(, , , i32) +declare @llvm.vp.merge.nxv8i64(, , , i32) +declare @llvm.vp.select.nxv8i64(, , , i32) + +define @vnmsac_vv_nxv8i64( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv8i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; CHECK-NEXT: vnmsac.vv v24, v8, v16 +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv8i64( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu +; RV32-NEXT: vnmsac.vv v16, v8, v24, v0.t +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, mu +; RV64-NEXT: vnmsac.vx v16, a0, v8, v0.t +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv8i64_unmasked( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv8i64_unmasked: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, ma +; RV32-NEXT: vnmsac.vv v16, v8, v24 +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv8i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, tu, ma +; RV64-NEXT: vnmsac.vx v16, a0, v8 +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( %allones, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vv_nxv8i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: vnmsac_vv_nxv8i64_ta: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: ret + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i64( %m, %y, %c, i32 %evl) + ret %u +} + +define @vnmsac_vx_nxv8i64_ta( %a, i64 %b, %c, %m, i32 zeroext %evl) { +; RV32-LABEL: vnmsac_vx_nxv8i64_ta: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu +; RV32-NEXT: vnmsac.vv v16, v8, v24, v0.t +; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsac_vx_nxv8i64_ta: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu +; RV64-NEXT: vnmsac.vx v16, a0, v8, v0.t +; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: ret + %elt.head = insertelement poison, i64 %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %splat = insertelement poison, i1 -1, i32 0 + %allones = shufflevector %splat, poison, zeroinitializer + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) + %y = call @llvm.vp.sub.nxv8i64( %c, %x, %allones, i32 %evl) + %u = call @llvm.vp.select.nxv8i64( %m, %y, %c, i32 %evl) + ret %u +}