diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1170,7 +1170,13 @@ case CASE_VFMA_OPCODE_LMULS(FMACC, VV): case CASE_VFMA_OPCODE_LMULS(FMSAC, VV): case CASE_VFMA_OPCODE_LMULS(FNMACC, VV): - case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV): { + case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV): + case CASE_VFMA_OPCODE_LMULS(MADD, VX): + case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): + case CASE_VFMA_OPCODE_LMULS(MACC, VX): + case CASE_VFMA_OPCODE_LMULS(NMSAC, VX): + case CASE_VFMA_OPCODE_LMULS(MACC, VV): + case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): { // For these instructions we can only swap operand 1 and operand 3 by // changing the opcode. unsigned CommutableOpIdx1 = 1; @@ -1183,7 +1189,9 @@ case CASE_VFMA_OPCODE_LMULS(FMADD, VV): case CASE_VFMA_OPCODE_LMULS(FMSUB, VV): case CASE_VFMA_OPCODE_LMULS(FNMADD, VV): - case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV): { + case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV): + case CASE_VFMA_OPCODE_LMULS(MADD, VV): + case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { // For these instructions we have more freedom. We can commute with the // other multiplicand or with the addend/subtrahend/minuend. @@ -1288,7 +1296,13 @@ case CASE_VFMA_OPCODE_LMULS(FMACC, VV): case CASE_VFMA_OPCODE_LMULS(FMSAC, VV): case CASE_VFMA_OPCODE_LMULS(FNMACC, VV): - case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV): { + case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV): + case CASE_VFMA_OPCODE_LMULS(MADD, VX): + case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): + case CASE_VFMA_OPCODE_LMULS(MACC, VX): + case CASE_VFMA_OPCODE_LMULS(NMSAC, VX): + case CASE_VFMA_OPCODE_LMULS(MACC, VV): + case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): { // It only make sense to toggle these between clobbering the // addend/subtrahend/minuend one of the multiplicands. assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); @@ -1309,6 +1323,12 @@ CASE_VFMA_CHANGE_OPCODE_LMULS(FMSAC, FMSUB, VV) CASE_VFMA_CHANGE_OPCODE_LMULS(FNMACC, FNMADD, VV) CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSAC, FNMSUB, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX) + CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX) + CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX) + CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX) + CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV) } auto &WorkingMI = cloneIfNew(MI); @@ -1319,7 +1339,9 @@ case CASE_VFMA_OPCODE_LMULS(FMADD, VV): case CASE_VFMA_OPCODE_LMULS(FMSUB, VV): case CASE_VFMA_OPCODE_LMULS(FNMADD, VV): - case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV): { + case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV): + case CASE_VFMA_OPCODE_LMULS(MADD, VV): + case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); // If one of the operands, is the addend we need to change opcode. // Otherwise we're just swapping 2 of the multiplicands. @@ -1332,6 +1354,8 @@ CASE_VFMA_CHANGE_OPCODE_LMULS(FMSUB, FMSAC, VV) CASE_VFMA_CHANGE_OPCODE_LMULS(FNMADD, FNMACC, VV) CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSUB, FNMSAC, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV) } auto &WorkingMI = cloneIfNew(MI); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1936,6 +1936,22 @@ multiclass VPseudoTernaryV_VV_VX_AAXA<string Constraint = ""> { defm "" : VPseudoTernaryV_VV<Constraint>; defm "" : VPseudoTernaryV_VX_AAXA<Constraint>; + + foreach m = MxList.m in { + // Add a commutable version for use by IR mul+add. + // NOTE: We need this because we use a tail undisturbed policy on the + // intrinsic version so we can't commute those instructions since it would + // change which input operand is tied to the destination. That would + // remove user control of the tail elements. + let isCommutable = 1, ForceTailAgnostic = true, VLMul = m.value in { + def "_VV_" # m.MX # "_COMMUTABLE" : VPseudoTernaryNoMask<m.vrclass, + m.vrclass, + m.vrclass, + Constraint>; + def "_VX_" # m.MX # "_COMMUTABLE" : + VPseudoTernaryNoMask<m.vrclass, GPR, m.vrclass, Constraint>; + } + } } multiclass VPseudoTernaryV_VV_VF_AAXA<string Constraint = ""> { @@ -1943,7 +1959,7 @@ defm "" : VPseudoTernaryV_VF_AAXA<Constraint>; foreach m = MxList.m in { - // Add a commutable version for use by IR fma. + // Add a commutable version for use by IR mul+add. // NOTE: We need this because we use a tail undisturbed policy on the // intrinsic version so we can't commute those instructions since it would // change which input operand is tied to the destination. That would diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -472,6 +472,38 @@ defm : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU">; defm : VPatBinarySDNode_VV_VX<srem, "PseudoVREM">; +// 12.13 Vector Single-Width Integer Multiply-Add Instructions. +foreach vti = AllIntegerVectors in { + // NOTE: We choose VMADD because it has the most commuting freedom. So it + // works best with how TwoAddressInstructionPass tries commuting. + defvar suffix = vti.LMul.MX # "_COMMUTABLE"; + def : Pat<(vti.Vector (add vti.RegClass:$rs2, + (mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))), + (!cast<Instruction>("PseudoVMADD_VV_"# suffix) + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW)>; + def : Pat<(vti.Vector (sub vti.RegClass:$rs2, + (mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))), + (!cast<Instruction>("PseudoVNMSUB_VV_"# suffix) + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW)>; + + // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally + // commutable. + def : Pat<(vti.Vector (add vti.RegClass:$rs2, + (mul_oneuse (SplatPat XLenVT:$rs1), + vti.RegClass:$rd))), + (!cast<Instruction>("PseudoVMADD_VX_" # suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW)>; + def : Pat<(vti.Vector (sub vti.RegClass:$rs2, + (mul_oneuse (SplatPat XLenVT:$rs1), + vti.RegClass:$rd))), + (!cast<Instruction>("PseudoVNMSUB_VX_" # suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW)>; +} + // 12.15. Vector Integer Merge Instructions foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), vti.RegClass:$rs1, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -220,6 +220,12 @@ SDTCisSameNumEltsAs<1, 3>, SDTCisVT<4, XLenVT> ]>; +def riscv_mul_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D), + (riscv_mul_vl node:$A, node:$B, node:$C, + node:$D), [{ + return N->hasOneUse(); +}]>; + foreach kind = ["ADD", "UMAX", "SMAX", "UMIN", "SMIN", "AND", "OR", "XOR", "FADD", "SEQ_FADD", "FMIN", "FMAX"] in def rvv_vecreduce_#kind#_vl : SDNode<"RISCVISD::VECREDUCE_"#kind#"_VL", SDTRVVVecReduce>; @@ -731,6 +737,53 @@ defm : VPatBinaryVL_VV_VX<riscv_urem_vl, "PseudoVREMU">; defm : VPatBinaryVL_VV_VX<riscv_srem_vl, "PseudoVREM">; +// 12.13 Vector Single-Width Integer Multiply-Add Instructions +foreach vti = AllIntegerVectors in { + // NOTE: We choose VMADD because it has the most commuting freedom. So it + // works best with how TwoAddressInstructionPass tries commuting. + defvar suffix = vti.LMul.MX # "_COMMUTABLE"; + def : Pat<(vti.Vector + (riscv_add_vl vti.RegClass:$rs2, + (riscv_mul_vl_oneuse vti.RegClass:$rs1, + vti.RegClass:$rd, + (vti.Mask true_mask), VLOpFrag), + (vti.Mask true_mask), VLOpFrag)), + (!cast<Instruction>("PseudoVMADD_VV_"# suffix) + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector + (riscv_sub_vl vti.RegClass:$rs2, + (riscv_mul_vl_oneuse vti.RegClass:$rs1, + vti.RegClass:$rd, + (vti.Mask true_mask), VLOpFrag), + (vti.Mask true_mask), VLOpFrag)), + (!cast<Instruction>("PseudoVNMSUB_VV_"# suffix) + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW)>; + + // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally + // commutable. + def : Pat<(vti.Vector + (riscv_add_vl vti.RegClass:$rs2, + (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), + vti.RegClass:$rd, + (vti.Mask true_mask), VLOpFrag), + (vti.Mask true_mask), VLOpFrag)), + (!cast<Instruction>("PseudoVMADD_VX_" # suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector + (riscv_sub_vl vti.RegClass:$rs2, + (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), + vti.RegClass:$rd, + (vti.Mask true_mask), + VLOpFrag), + (vti.Mask true_mask), VLOpFrag)), + (!cast<Instruction>("PseudoVNMSUB_VX_" # suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW)>; +} + // 12.15. Vector Integer Merge Instructions foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm), diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1233,19 +1233,18 @@ ; RV32-NEXT: vmv.v.i v27, -1 ; RV32-NEXT: vmerge.vim v27, v27, 0, v0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vmul.vv v25, v25, v27 -; RV32-NEXT: vadd.vv v25, v26, v25 +; RV32-NEXT: vmadd.vv v27, v25, v26 ; RV32-NEXT: addi a1, zero, 63 -; RV32-NEXT: vsrl.vx v26, v25, a1 +; RV32-NEXT: vsrl.vx v25, v27, a1 ; RV32-NEXT: addi a1, zero, 1 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vmv.s.x v27, a1 +; RV32-NEXT: vmv.s.x v26, a1 ; RV32-NEXT: vmv.v.i v28, 0 ; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, mu -; RV32-NEXT: vslideup.vi v28, v27, 2 +; RV32-NEXT: vslideup.vi v28, v26, 2 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vsra.vv v25, v25, v28 -; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: vsra.vv v26, v27, v28 +; RV32-NEXT: vadd.vv v25, v26, v25 ; RV32-NEXT: vse64.v v25, (a0) ; RV32-NEXT: ret ; @@ -1256,8 +1255,6 @@ ; RV64-NEXT: vmv.v.i v26, -1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV64-NEXT: vmv.s.x v26, zero -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vmul.vv v26, v25, v26 ; RV64-NEXT: lui a1, 21845 ; RV64-NEXT: addiw a1, a1, 1365 ; RV64-NEXT: slli a1, a1, 12 @@ -1266,18 +1263,19 @@ ; RV64-NEXT: addi a1, a1, 1365 ; RV64-NEXT: slli a1, a1, 12 ; RV64-NEXT: addi a2, a1, 1365 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV64-NEXT: vmv.v.x v27, a2 ; RV64-NEXT: addi a1, a1, 1366 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV64-NEXT: vmv.s.x v27, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vmulh.vv v25, v25, v27 -; RV64-NEXT: vadd.vv v25, v25, v26 +; RV64-NEXT: vmulh.vv v27, v25, v27 +; RV64-NEXT: vmacc.vv v27, v25, v26 ; RV64-NEXT: addi a1, zero, 63 -; RV64-NEXT: vsrl.vx v26, v25, a1 -; RV64-NEXT: vid.v v27 -; RV64-NEXT: vsra.vv v25, v25, v27 -; RV64-NEXT: vadd.vv v25, v25, v26 +; RV64-NEXT: vsrl.vx v25, v27, a1 +; RV64-NEXT: vid.v v26 +; RV64-NEXT: vsra.vv v26, v27, v26 +; RV64-NEXT: vadd.vv v25, v26, v25 ; RV64-NEXT: vse64.v v25, (a0) ; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x @@ -4709,37 +4707,36 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV32-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV32-NEXT: addi a1, zero, 51 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.i v28, -1 -; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; LMULMAX2-RV32-NEXT: vmul.vv v28, v26, v28 ; LMULMAX2-RV32-NEXT: addi a1, zero, 17 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a2, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.x v30, a2 +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a2 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1366 -; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v30, a1, v0 +; LMULMAX2-RV32-NEXT: vmerge.vxm v28, v28, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; LMULMAX2-RV32-NEXT: vmulh.vv v26, v26, v30 -; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vmulh.vv v28, v26, v28 +; LMULMAX2-RV32-NEXT: addi a1, zero, 51 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; LMULMAX2-RV32-NEXT: vmv.v.i v30, -1 +; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 0, v0 +; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; LMULMAX2-RV32-NEXT: vmadd.vv v30, v26, v28 ; LMULMAX2-RV32-NEXT: addi a1, zero, 63 -; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a1 +; LMULMAX2-RV32-NEXT: vsrl.vx v26, v30, a1 ; LMULMAX2-RV32-NEXT: addi a1, zero, 68 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.i v30, 0 -; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 1, v0 +; LMULMAX2-RV32-NEXT: vmv.v.i v28, 0 +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 1, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; LMULMAX2-RV32-NEXT: vsra.vv v26, v26, v30 -; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vsra.vv v28, v30, v28 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v28, v26 ; LMULMAX2-RV32-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -4753,7 +4750,6 @@ ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.v.i v28, -1 ; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 0, v0 -; LMULMAX2-RV64-NEXT: vmul.vv v28, v26, v28 ; LMULMAX2-RV64-NEXT: lui a1, 21845 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 ; LMULMAX2-RV64-NEXT: slli a1, a1, 12 @@ -4765,14 +4761,14 @@ ; LMULMAX2-RV64-NEXT: vmv.v.x v30, a2 ; LMULMAX2-RV64-NEXT: addi a1, a1, 1366 ; LMULMAX2-RV64-NEXT: vmerge.vxm v30, v30, a1, v0 -; LMULMAX2-RV64-NEXT: vmulh.vv v26, v26, v30 -; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vmulh.vv v30, v26, v30 +; LMULMAX2-RV64-NEXT: vmacc.vv v30, v26, v28 ; LMULMAX2-RV64-NEXT: addi a1, zero, 63 -; LMULMAX2-RV64-NEXT: vsrl.vx v28, v26, a1 -; LMULMAX2-RV64-NEXT: vmv.v.i v30, 1 -; LMULMAX2-RV64-NEXT: vmerge.vim v30, v30, 0, v0 -; LMULMAX2-RV64-NEXT: vsra.vv v26, v26, v30 -; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vsrl.vx v26, v30, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v28, 1 +; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 0, v0 +; LMULMAX2-RV64-NEXT: vsra.vv v28, v30, v28 +; LMULMAX2-RV64-NEXT: vadd.vv v26, v28, v26 ; LMULMAX2-RV64-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -4802,8 +4798,6 @@ ; LMULMAX1-RV64-NEXT: vmv.v.i v27, -1 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v27, zero -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vmul.vv v28, v26, v27 ; LMULMAX1-RV64-NEXT: lui a2, 21845 ; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 ; LMULMAX1-RV64-NEXT: slli a2, a2, 12 @@ -4812,24 +4806,24 @@ ; LMULMAX1-RV64-NEXT: addi a2, a2, 1365 ; LMULMAX1-RV64-NEXT: slli a2, a2, 12 ; LMULMAX1-RV64-NEXT: addi a3, a2, 1365 -; LMULMAX1-RV64-NEXT: vmv.v.x v29, a3 +; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; LMULMAX1-RV64-NEXT: vmv.v.x v28, a3 ; LMULMAX1-RV64-NEXT: addi a2, a2, 1366 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-RV64-NEXT: vmv.s.x v29, a2 +; LMULMAX1-RV64-NEXT: vmv.s.x v28, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vmulh.vv v26, v26, v29 -; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v28 +; LMULMAX1-RV64-NEXT: vmulh.vv v29, v26, v28 +; LMULMAX1-RV64-NEXT: vmacc.vv v29, v27, v26 ; LMULMAX1-RV64-NEXT: addi a2, zero, 63 -; LMULMAX1-RV64-NEXT: vsrl.vx v28, v26, a2 +; LMULMAX1-RV64-NEXT: vsrl.vx v26, v29, a2 ; LMULMAX1-RV64-NEXT: vid.v v30 -; LMULMAX1-RV64-NEXT: vsra.vv v26, v26, v30 -; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v28 -; LMULMAX1-RV64-NEXT: vmul.vv v27, v25, v27 -; LMULMAX1-RV64-NEXT: vmulh.vv v25, v25, v29 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v27 -; LMULMAX1-RV64-NEXT: vsrl.vx v27, v25, a2 -; LMULMAX1-RV64-NEXT: vsra.vv v25, v25, v30 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v27 +; LMULMAX1-RV64-NEXT: vsra.vv v29, v29, v30 +; LMULMAX1-RV64-NEXT: vadd.vv v26, v29, v26 +; LMULMAX1-RV64-NEXT: vmulh.vv v28, v25, v28 +; LMULMAX1-RV64-NEXT: vmacc.vv v28, v25, v27 +; LMULMAX1-RV64-NEXT: vsrl.vx v25, v28, a2 +; LMULMAX1-RV64-NEXT: vsra.vv v27, v28, v30 +; LMULMAX1-RV64-NEXT: vadd.vv v25, v27, v25 ; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v26, (a1) ; LMULMAX1-RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll @@ -0,0 +1,592 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +; This tests a mix of vmacc and vmadd by using different operand orders to +; trigger commuting in TwoAddressInstructionPass. + +define <vscale x 1 x i8> @vmadd_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i8> %vc) { +; CHECK-LABEL: vmadd_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 1 x i8> %va, %vb + %y = add <vscale x 1 x i8> %x, %vc + ret <vscale x 1 x i8> %y +} + +define <vscale x 1 x i8> @vmadd_vx_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer + %x = mul <vscale x 1 x i8> %va, %splat + %y = add <vscale x 1 x i8> %x, %vb + ret <vscale x 1 x i8> %y +} + +define <vscale x 2 x i8> @vmadd_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i8> %vc) { +; CHECK-LABEL: vmadd_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul <vscale x 2 x i8> %va, %vc + %y = add <vscale x 2 x i8> %x, %vb + ret <vscale x 2 x i8> %y +} + +define <vscale x 2 x i8> @vmadd_vx_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer + %x = mul <vscale x 2 x i8> %vb, %splat + %y = add <vscale x 2 x i8> %x, %va + ret <vscale x 2 x i8> %y +} + +define <vscale x 4 x i8> @vmadd_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i8> %vc) { +; CHECK-LABEL: vmadd_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 4 x i8> %vb, %va + %y = add <vscale x 4 x i8> %x, %vc + ret <vscale x 4 x i8> %y +} + +define <vscale x 4 x i8> @vmadd_vx_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer + %x = mul <vscale x 4 x i8> %va, %splat + %y = add <vscale x 4 x i8> %x, %vb + ret <vscale x 4 x i8> %y +} + +define <vscale x 8 x i8> @vmadd_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i8> %vc) { +; CHECK-LABEL: vmadd_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-NEXT: vmacc.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul <vscale x 8 x i8> %vb, %vc + %y = add <vscale x 8 x i8> %x, %va + ret <vscale x 8 x i8> %y +} + +define <vscale x 8 x i8> @vmadd_vx_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer + %x = mul <vscale x 8 x i8> %vb, %splat + %y = add <vscale x 8 x i8> %x, %va + ret <vscale x 8 x i8> %y +} + +define <vscale x 16 x i8> @vmadd_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i8> %vc) { +; CHECK-LABEL: vmadd_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v12, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 16 x i8> %vc, %va + %y = add <vscale x 16 x i8> %x, %vb + ret <vscale x 16 x i8> %y +} + +define <vscale x 16 x i8> @vmadd_vx_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer + %x = mul <vscale x 16 x i8> %va, %splat + %y = add <vscale x 16 x i8> %x, %vb + ret <vscale x 16 x i8> %y +} + +define <vscale x 32 x i8> @vmadd_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i8> %vc) { +; CHECK-LABEL: vmadd_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu +; CHECK-NEXT: vmacc.vv v8, v16, v12 +; CHECK-NEXT: ret + %x = mul <vscale x 32 x i8> %vc, %vb + %y = add <vscale x 32 x i8> %x, %va + ret <vscale x 32 x i8> %y +} + +define <vscale x 32 x i8> @vmadd_vx_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer + %x = mul <vscale x 32 x i8> %vb, %splat + %y = add <vscale x 32 x i8> %x, %va + ret <vscale x 32 x i8> %y +} + +define <vscale x 64 x i8> @vmadd_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i8> %vc) { +; CHECK-LABEL: vmadd_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu +; CHECK-NEXT: vmacc.vv v8, v16, v24 +; CHECK-NEXT: ret + %x = mul <vscale x 64 x i8> %vc, %vb + %y = add <vscale x 64 x i8> %x, %va + ret <vscale x 64 x i8> %y +} + +define <vscale x 64 x i8> @vmadd_vx_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 64 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer + %x = mul <vscale x 64 x i8> %vb, %splat + %y = add <vscale x 64 x i8> %x, %va + ret <vscale x 64 x i8> %y +} + +define <vscale x 1 x i16> @vmadd_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i16> %vc) { +; CHECK-LABEL: vmadd_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 1 x i16> %va, %vb + %y = add <vscale x 1 x i16> %x, %vc + ret <vscale x 1 x i16> %y +} + +define <vscale x 1 x i16> @vmadd_vx_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, i16 %c) { +; CHECK-LABEL: vmadd_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i16> undef, i16 %c, i32 0 + %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer + %x = mul <vscale x 1 x i16> %va, %splat + %y = add <vscale x 1 x i16> %x, %vb + ret <vscale x 1 x i16> %y +} + +define <vscale x 2 x i16> @vmadd_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i16> %vc) { +; CHECK-LABEL: vmadd_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul <vscale x 2 x i16> %va, %vc + %y = add <vscale x 2 x i16> %x, %vb + ret <vscale x 2 x i16> %y +} + +define <vscale x 2 x i16> @vmadd_vx_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, i16 %c) { +; CHECK-LABEL: vmadd_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i16> undef, i16 %c, i32 0 + %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer + %x = mul <vscale x 2 x i16> %vb, %splat + %y = add <vscale x 2 x i16> %x, %va + ret <vscale x 2 x i16> %y +} + +define <vscale x 4 x i16> @vmadd_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i16> %vc) { +; CHECK-LABEL: vmadd_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 4 x i16> %vb, %va + %y = add <vscale x 4 x i16> %x, %vc + ret <vscale x 4 x i16> %y +} + +define <vscale x 4 x i16> @vmadd_vx_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, i16 %c) { +; CHECK-LABEL: vmadd_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i16> undef, i16 %c, i32 0 + %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer + %x = mul <vscale x 4 x i16> %va, %splat + %y = add <vscale x 4 x i16> %x, %vb + ret <vscale x 4 x i16> %y +} + +define <vscale x 8 x i16> @vmadd_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i16> %vc) { +; CHECK-LABEL: vmadd_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmacc.vv v8, v12, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 8 x i16> %vb, %vc + %y = add <vscale x 8 x i16> %x, %va + ret <vscale x 8 x i16> %y +} + +define <vscale x 8 x i16> @vmadd_vx_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, i16 %c) { +; CHECK-LABEL: vmadd_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i16> undef, i16 %c, i32 0 + %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer + %x = mul <vscale x 8 x i16> %vb, %splat + %y = add <vscale x 8 x i16> %x, %va + ret <vscale x 8 x i16> %y +} + +define <vscale x 16 x i16> @vmadd_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i16> %vc) { +; CHECK-LABEL: vmadd_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v16, v12 +; CHECK-NEXT: ret + %x = mul <vscale x 16 x i16> %vc, %va + %y = add <vscale x 16 x i16> %x, %vb + ret <vscale x 16 x i16> %y +} + +define <vscale x 16 x i16> @vmadd_vx_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, i16 %c) { +; CHECK-LABEL: vmadd_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i16> undef, i16 %c, i32 0 + %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer + %x = mul <vscale x 16 x i16> %va, %splat + %y = add <vscale x 16 x i16> %x, %vb + ret <vscale x 16 x i16> %y +} + +define <vscale x 32 x i16> @vmadd_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i16> %vc) { +; CHECK-LABEL: vmadd_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmacc.vv v8, v16, v24 +; CHECK-NEXT: ret + %x = mul <vscale x 32 x i16> %vc, %vb + %y = add <vscale x 32 x i16> %x, %va + ret <vscale x 32 x i16> %y +} + +define <vscale x 32 x i16> @vmadd_vx_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, i16 %c) { +; CHECK-LABEL: vmadd_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x i16> undef, i16 %c, i32 0 + %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer + %x = mul <vscale x 32 x i16> %vb, %splat + %y = add <vscale x 32 x i16> %x, %va + ret <vscale x 32 x i16> %y +} + +define <vscale x 1 x i32> @vmadd_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i32> %vc) { +; CHECK-LABEL: vmadd_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 1 x i32> %va, %vb + %y = add <vscale x 1 x i32> %x, %vc + ret <vscale x 1 x i32> %y +} + +define <vscale x 1 x i32> @vmadd_vx_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, i32 %c) { +; CHECK-LABEL: vmadd_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i32> undef, i32 %c, i32 0 + %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer + %x = mul <vscale x 1 x i32> %va, %splat + %y = add <vscale x 1 x i32> %x, %vb + ret <vscale x 1 x i32> %y +} + +define <vscale x 2 x i32> @vmadd_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i32> %vc) { +; CHECK-LABEL: vmadd_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vmadd.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul <vscale x 2 x i32> %va, %vc + %y = add <vscale x 2 x i32> %x, %vb + ret <vscale x 2 x i32> %y +} + +define <vscale x 2 x i32> @vmadd_vx_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, i32 %c) { +; CHECK-LABEL: vmadd_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i32> undef, i32 %c, i32 0 + %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer + %x = mul <vscale x 2 x i32> %vb, %splat + %y = add <vscale x 2 x i32> %x, %va + ret <vscale x 2 x i32> %y +} + +define <vscale x 4 x i32> @vmadd_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i32> %vc) { +; CHECK-LABEL: vmadd_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v10, v12 +; CHECK-NEXT: ret + %x = mul <vscale x 4 x i32> %vb, %va + %y = add <vscale x 4 x i32> %x, %vc + ret <vscale x 4 x i32> %y +} + +define <vscale x 4 x i32> @vmadd_vx_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, i32 %c) { +; CHECK-LABEL: vmadd_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i32> undef, i32 %c, i32 0 + %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer + %x = mul <vscale x 4 x i32> %va, %splat + %y = add <vscale x 4 x i32> %x, %vb + ret <vscale x 4 x i32> %y +} + +define <vscale x 8 x i32> @vmadd_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i32> %vc) { +; CHECK-LABEL: vmadd_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vmacc.vv v8, v16, v12 +; CHECK-NEXT: ret + %x = mul <vscale x 8 x i32> %vb, %vc + %y = add <vscale x 8 x i32> %x, %va + ret <vscale x 8 x i32> %y +} + +define <vscale x 8 x i32> @vmadd_vx_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, i32 %c) { +; CHECK-LABEL: vmadd_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i32> undef, i32 %c, i32 0 + %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer + %x = mul <vscale x 8 x i32> %vb, %splat + %y = add <vscale x 8 x i32> %x, %va + ret <vscale x 8 x i32> %y +} + +define <vscale x 16 x i32> @vmadd_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i32> %vc) { +; CHECK-LABEL: vmadd_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vmadd.vv v8, v24, v16 +; CHECK-NEXT: ret + %x = mul <vscale x 16 x i32> %vc, %va + %y = add <vscale x 16 x i32> %x, %vb + ret <vscale x 16 x i32> %y +} + +define <vscale x 16 x i32> @vmadd_vx_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, i32 %c) { +; CHECK-LABEL: vmadd_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i32> undef, i32 %c, i32 0 + %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer + %x = mul <vscale x 16 x i32> %va, %splat + %y = add <vscale x 16 x i32> %x, %vb + ret <vscale x 16 x i32> %y +} + +define <vscale x 1 x i64> @vmadd_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i64> %vc) { +; CHECK-LABEL: vmadd_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 1 x i64> %va, %vb + %y = add <vscale x 1 x i64> %x, %vc + ret <vscale x 1 x i64> %y +} + +define <vscale x 1 x i64> @vmadd_vx_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, i64 %c) { +; RV32-LABEL: vmadd_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vmadd.vv v8, v25, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; RV64-NEXT: vmadd.vx v8, a0, v9 +; RV64-NEXT: ret + %head = insertelement <vscale x 1 x i64> undef, i64 %c, i32 0 + %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer + %x = mul <vscale x 1 x i64> %va, %splat + %y = add <vscale x 1 x i64> %x, %vb + ret <vscale x 1 x i64> %y +} + +define <vscale x 2 x i64> @vmadd_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i64> %vc) { +; CHECK-LABEL: vmadd_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v12, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 2 x i64> %va, %vc + %y = add <vscale x 2 x i64> %x, %vb + ret <vscale x 2 x i64> %y +} + +define <vscale x 2 x i64> @vmadd_vx_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, i64 %c) { +; RV32-LABEL: vmadd_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vmacc.vv v8, v10, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; RV64-NEXT: vmacc.vx v8, a0, v10 +; RV64-NEXT: ret + %head = insertelement <vscale x 2 x i64> undef, i64 %c, i32 0 + %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer + %x = mul <vscale x 2 x i64> %vb, %splat + %y = add <vscale x 2 x i64> %x, %va + ret <vscale x 2 x i64> %y +} + +define <vscale x 4 x i64> @vmadd_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i64> %vc) { +; CHECK-LABEL: vmadd_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v12, v16 +; CHECK-NEXT: ret + %x = mul <vscale x 4 x i64> %vb, %va + %y = add <vscale x 4 x i64> %x, %vc + ret <vscale x 4 x i64> %y +} + +define <vscale x 4 x i64> @vmadd_vx_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, i64 %c) { +; RV32-LABEL: vmadd_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vmadd.vv v8, v28, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; RV64-NEXT: vmadd.vx v8, a0, v12 +; RV64-NEXT: ret + %head = insertelement <vscale x 4 x i64> undef, i64 %c, i32 0 + %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer + %x = mul <vscale x 4 x i64> %va, %splat + %y = add <vscale x 4 x i64> %x, %vb + ret <vscale x 4 x i64> %y +} + +define <vscale x 8 x i64> @vmadd_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i64> %vc) { +; CHECK-LABEL: vmadd_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmacc.vv v8, v16, v24 +; CHECK-NEXT: ret + %x = mul <vscale x 8 x i64> %vb, %vc + %y = add <vscale x 8 x i64> %x, %va + ret <vscale x 8 x i64> %y +} + +define <vscale x 8 x i64> @vmadd_vx_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, i64 %c) { +; RV32-LABEL: vmadd_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vmacc.vv v8, v16, v24 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vmacc.vx v8, a0, v16 +; RV64-NEXT: ret + %head = insertelement <vscale x 8 x i64> undef, i64 %c, i32 0 + %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer + %x = mul <vscale x 8 x i64> %vb, %splat + %y = add <vscale x 8 x i64> %x, %va + ret <vscale x 8 x i64> %y +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll @@ -6,11 +6,11 @@ define <vscale x 4 x i1> @srem_eq_fold_nxv4i8(<vscale x 4 x i8> %va) { ; CHECK-LABEL: srem_eq_fold_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, -85 -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmul.vx v25, v8, a0 ; CHECK-NEXT: addi a0, zero, 42 -; CHECK-NEXT: vadd.vx v25, v25, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: addi a1, zero, -85 +; CHECK-NEXT: vmacc.vx v25, a1, v8 ; CHECK-NEXT: vsll.vi v26, v25, 7 ; CHECK-NEXT: vsrl.vi v25, v25, 1 ; CHECK-NEXT: vor.vv v25, v25, v26 diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll @@ -0,0 +1,592 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +; This tests a mix of vmacc and vmsub by using different operand orders to +; trigger commuting in TwosubressInstructionPass. + +define <vscale x 1 x i8> @vnmsub_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i8> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 1 x i8> %va, %vb + %y = sub <vscale x 1 x i8> %vc, %x + ret <vscale x 1 x i8> %y +} + +define <vscale x 1 x i8> @vnmsub_vx_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer + %x = mul <vscale x 1 x i8> %va, %splat + %y = sub <vscale x 1 x i8> %vb, %x + ret <vscale x 1 x i8> %y +} + +define <vscale x 2 x i8> @vnmsub_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i8> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul <vscale x 2 x i8> %va, %vc + %y = sub <vscale x 2 x i8> %vb, %x + ret <vscale x 2 x i8> %y +} + +define <vscale x 2 x i8> @vnmsub_vx_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer + %x = mul <vscale x 2 x i8> %vb, %splat + %y = sub <vscale x 2 x i8> %va, %x + ret <vscale x 2 x i8> %y +} + +define <vscale x 4 x i8> @vnmsub_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i8> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 4 x i8> %vb, %va + %y = sub <vscale x 4 x i8> %vc, %x + ret <vscale x 4 x i8> %y +} + +define <vscale x 4 x i8> @vnmsub_vx_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer + %x = mul <vscale x 4 x i8> %va, %splat + %y = sub <vscale x 4 x i8> %vb, %x + ret <vscale x 4 x i8> %y +} + +define <vscale x 8 x i8> @vnmsub_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i8> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul <vscale x 8 x i8> %vb, %vc + %y = sub <vscale x 8 x i8> %va, %x + ret <vscale x 8 x i8> %y +} + +define <vscale x 8 x i8> @vnmsub_vx_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer + %x = mul <vscale x 8 x i8> %vb, %splat + %y = sub <vscale x 8 x i8> %va, %x + ret <vscale x 8 x i8> %y +} + +define <vscale x 16 x i8> @vnmsub_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i8> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v12, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 16 x i8> %vc, %va + %y = sub <vscale x 16 x i8> %vb, %x + ret <vscale x 16 x i8> %y +} + +define <vscale x 16 x i8> @vnmsub_vx_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer + %x = mul <vscale x 16 x i8> %va, %splat + %y = sub <vscale x 16 x i8> %vb, %x + ret <vscale x 16 x i8> %y +} + +define <vscale x 32 x i8> @vnmsub_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i8> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v16, v12 +; CHECK-NEXT: ret + %x = mul <vscale x 32 x i8> %vc, %vb + %y = sub <vscale x 32 x i8> %va, %x + ret <vscale x 32 x i8> %y +} + +define <vscale x 32 x i8> @vnmsub_vx_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer + %x = mul <vscale x 32 x i8> %vb, %splat + %y = sub <vscale x 32 x i8> %va, %x + ret <vscale x 32 x i8> %y +} + +define <vscale x 64 x i8> @vnmsub_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i8> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v16, v24 +; CHECK-NEXT: ret + %x = mul <vscale x 64 x i8> %vc, %vb + %y = sub <vscale x 64 x i8> %va, %x + ret <vscale x 64 x i8> %y +} + +define <vscale x 64 x i8> @vnmsub_vx_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 64 x i8> undef, i8 %c, i32 0 + %splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer + %x = mul <vscale x 64 x i8> %vb, %splat + %y = sub <vscale x 64 x i8> %va, %x + ret <vscale x 64 x i8> %y +} + +define <vscale x 1 x i16> @vnmsub_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i16> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 1 x i16> %va, %vb + %y = sub <vscale x 1 x i16> %vc, %x + ret <vscale x 1 x i16> %y +} + +define <vscale x 1 x i16> @vnmsub_vx_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, i16 %c) { +; CHECK-LABEL: vnmsub_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i16> undef, i16 %c, i32 0 + %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer + %x = mul <vscale x 1 x i16> %va, %splat + %y = sub <vscale x 1 x i16> %vb, %x + ret <vscale x 1 x i16> %y +} + +define <vscale x 2 x i16> @vnmsub_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i16> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul <vscale x 2 x i16> %va, %vc + %y = sub <vscale x 2 x i16> %vb, %x + ret <vscale x 2 x i16> %y +} + +define <vscale x 2 x i16> @vnmsub_vx_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, i16 %c) { +; CHECK-LABEL: vnmsub_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i16> undef, i16 %c, i32 0 + %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer + %x = mul <vscale x 2 x i16> %vb, %splat + %y = sub <vscale x 2 x i16> %va, %x + ret <vscale x 2 x i16> %y +} + +define <vscale x 4 x i16> @vnmsub_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i16> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 4 x i16> %vb, %va + %y = sub <vscale x 4 x i16> %vc, %x + ret <vscale x 4 x i16> %y +} + +define <vscale x 4 x i16> @vnmsub_vx_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, i16 %c) { +; CHECK-LABEL: vnmsub_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i16> undef, i16 %c, i32 0 + %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer + %x = mul <vscale x 4 x i16> %va, %splat + %y = sub <vscale x 4 x i16> %vb, %x + ret <vscale x 4 x i16> %y +} + +define <vscale x 8 x i16> @vnmsub_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i16> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v12, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 8 x i16> %vb, %vc + %y = sub <vscale x 8 x i16> %va, %x + ret <vscale x 8 x i16> %y +} + +define <vscale x 8 x i16> @vnmsub_vx_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, i16 %c) { +; CHECK-LABEL: vnmsub_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i16> undef, i16 %c, i32 0 + %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer + %x = mul <vscale x 8 x i16> %vb, %splat + %y = sub <vscale x 8 x i16> %va, %x + ret <vscale x 8 x i16> %y +} + +define <vscale x 16 x i16> @vnmsub_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i16> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v16, v12 +; CHECK-NEXT: ret + %x = mul <vscale x 16 x i16> %vc, %va + %y = sub <vscale x 16 x i16> %vb, %x + ret <vscale x 16 x i16> %y +} + +define <vscale x 16 x i16> @vnmsub_vx_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, i16 %c) { +; CHECK-LABEL: vnmsub_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i16> undef, i16 %c, i32 0 + %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer + %x = mul <vscale x 16 x i16> %va, %splat + %y = sub <vscale x 16 x i16> %vb, %x + ret <vscale x 16 x i16> %y +} + +define <vscale x 32 x i16> @vnmsub_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i16> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v16, v24 +; CHECK-NEXT: ret + %x = mul <vscale x 32 x i16> %vc, %vb + %y = sub <vscale x 32 x i16> %va, %x + ret <vscale x 32 x i16> %y +} + +define <vscale x 32 x i16> @vnmsub_vx_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, i16 %c) { +; CHECK-LABEL: vnmsub_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 32 x i16> undef, i16 %c, i32 0 + %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer + %x = mul <vscale x 32 x i16> %vb, %splat + %y = sub <vscale x 32 x i16> %va, %x + ret <vscale x 32 x i16> %y +} + +define <vscale x 1 x i32> @vnmsub_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i32> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 1 x i32> %va, %vb + %y = sub <vscale x 1 x i32> %vc, %x + ret <vscale x 1 x i32> %y +} + +define <vscale x 1 x i32> @vnmsub_vx_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, i32 %c) { +; CHECK-LABEL: vnmsub_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 1 x i32> undef, i32 %c, i32 0 + %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer + %x = mul <vscale x 1 x i32> %va, %splat + %y = sub <vscale x 1 x i32> %vb, %x + ret <vscale x 1 x i32> %y +} + +define <vscale x 2 x i32> @vnmsub_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i32> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul <vscale x 2 x i32> %va, %vc + %y = sub <vscale x 2 x i32> %vb, %x + ret <vscale x 2 x i32> %y +} + +define <vscale x 2 x i32> @vnmsub_vx_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, i32 %c) { +; CHECK-LABEL: vnmsub_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement <vscale x 2 x i32> undef, i32 %c, i32 0 + %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer + %x = mul <vscale x 2 x i32> %vb, %splat + %y = sub <vscale x 2 x i32> %va, %x + ret <vscale x 2 x i32> %y +} + +define <vscale x 4 x i32> @vnmsub_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i32> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v10, v12 +; CHECK-NEXT: ret + %x = mul <vscale x 4 x i32> %vb, %va + %y = sub <vscale x 4 x i32> %vc, %x + ret <vscale x 4 x i32> %y +} + +define <vscale x 4 x i32> @vnmsub_vx_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, i32 %c) { +; CHECK-LABEL: vnmsub_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v10 +; CHECK-NEXT: ret + %head = insertelement <vscale x 4 x i32> undef, i32 %c, i32 0 + %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer + %x = mul <vscale x 4 x i32> %va, %splat + %y = sub <vscale x 4 x i32> %vb, %x + ret <vscale x 4 x i32> %y +} + +define <vscale x 8 x i32> @vnmsub_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i32> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v16, v12 +; CHECK-NEXT: ret + %x = mul <vscale x 8 x i32> %vb, %vc + %y = sub <vscale x 8 x i32> %va, %x + ret <vscale x 8 x i32> %y +} + +define <vscale x 8 x i32> @vnmsub_vx_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, i32 %c) { +; CHECK-LABEL: vnmsub_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v12 +; CHECK-NEXT: ret + %head = insertelement <vscale x 8 x i32> undef, i32 %c, i32 0 + %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer + %x = mul <vscale x 8 x i32> %vb, %splat + %y = sub <vscale x 8 x i32> %va, %x + ret <vscale x 8 x i32> %y +} + +define <vscale x 16 x i32> @vnmsub_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i32> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v24, v16 +; CHECK-NEXT: ret + %x = mul <vscale x 16 x i32> %vc, %va + %y = sub <vscale x 16 x i32> %vb, %x + ret <vscale x 16 x i32> %y +} + +define <vscale x 16 x i32> @vnmsub_vx_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, i32 %c) { +; CHECK-LABEL: vnmsub_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v16 +; CHECK-NEXT: ret + %head = insertelement <vscale x 16 x i32> undef, i32 %c, i32 0 + %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer + %x = mul <vscale x 16 x i32> %va, %splat + %y = sub <vscale x 16 x i32> %vb, %x + ret <vscale x 16 x i32> %y +} + +define <vscale x 1 x i64> @vnmsub_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i64> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 1 x i64> %va, %vb + %y = sub <vscale x 1 x i64> %vc, %x + ret <vscale x 1 x i64> %y +} + +define <vscale x 1 x i64> @vnmsub_vx_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, i64 %c) { +; RV32-LABEL: vnmsub_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vnmsub.vv v8, v25, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsub_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; RV64-NEXT: vnmsub.vx v8, a0, v9 +; RV64-NEXT: ret + %head = insertelement <vscale x 1 x i64> undef, i64 %c, i32 0 + %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer + %x = mul <vscale x 1 x i64> %va, %splat + %y = sub <vscale x 1 x i64> %vb, %x + ret <vscale x 1 x i64> %y +} + +define <vscale x 2 x i64> @vnmsub_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i64> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v12, v10 +; CHECK-NEXT: ret + %x = mul <vscale x 2 x i64> %va, %vc + %y = sub <vscale x 2 x i64> %vb, %x + ret <vscale x 2 x i64> %y +} + +define <vscale x 2 x i64> @vnmsub_vx_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, i64 %c) { +; RV32-LABEL: vnmsub_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vnmsac.vv v8, v10, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsub_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; RV64-NEXT: vnmsac.vx v8, a0, v10 +; RV64-NEXT: ret + %head = insertelement <vscale x 2 x i64> undef, i64 %c, i32 0 + %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer + %x = mul <vscale x 2 x i64> %vb, %splat + %y = sub <vscale x 2 x i64> %va, %x + ret <vscale x 2 x i64> %y +} + +define <vscale x 4 x i64> @vnmsub_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i64> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v12, v16 +; CHECK-NEXT: ret + %x = mul <vscale x 4 x i64> %vb, %va + %y = sub <vscale x 4 x i64> %vc, %x + ret <vscale x 4 x i64> %y +} + +define <vscale x 4 x i64> @vnmsub_vx_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, i64 %c) { +; RV32-LABEL: vnmsub_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vnmsub.vv v8, v28, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsub_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; RV64-NEXT: vnmsub.vx v8, a0, v12 +; RV64-NEXT: ret + %head = insertelement <vscale x 4 x i64> undef, i64 %c, i32 0 + %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer + %x = mul <vscale x 4 x i64> %va, %splat + %y = sub <vscale x 4 x i64> %vb, %x + ret <vscale x 4 x i64> %y +} + +define <vscale x 8 x i64> @vnmsub_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i64> %vc) { +; CHECK-LABEL: vnmsub_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v16, v24 +; CHECK-NEXT: ret + %x = mul <vscale x 8 x i64> %vb, %vc + %y = sub <vscale x 8 x i64> %va, %x + ret <vscale x 8 x i64> %y +} + +define <vscale x 8 x i64> @vnmsub_vx_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, i64 %c) { +; RV32-LABEL: vnmsub_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vnmsac.vv v8, v16, v24 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsub_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vnmsac.vx v8, a0, v16 +; RV64-NEXT: ret + %head = insertelement <vscale x 8 x i64> undef, i64 %c, i32 0 + %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer + %x = mul <vscale x 8 x i64> %vb, %splat + %y = sub <vscale x 8 x i64> %va, %x + ret <vscale x 8 x i64> %y +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv32.ll @@ -34,8 +34,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer @@ -76,8 +75,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer @@ -118,8 +116,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer @@ -160,8 +157,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer @@ -202,8 +198,7 @@ ; CHECK-NEXT: vsrl.vi v28, v26, 7 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer @@ -244,8 +239,7 @@ ; CHECK-NEXT: vsrl.vi v12, v28, 7 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer @@ -286,8 +280,7 @@ ; CHECK-NEXT: vsrl.vi v24, v16, 7 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 64 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer @@ -328,8 +321,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 15 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer @@ -370,8 +362,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 15 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer @@ -412,8 +403,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 15 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer @@ -454,8 +444,7 @@ ; CHECK-NEXT: vsrl.vi v28, v26, 15 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer @@ -496,8 +485,7 @@ ; CHECK-NEXT: vsrl.vi v12, v28, 15 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer @@ -538,8 +526,7 @@ ; CHECK-NEXT: vsrl.vi v24, v16, 15 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer @@ -581,8 +568,7 @@ ; CHECK-NEXT: vsra.vi v25, v25, 2 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer @@ -624,8 +610,7 @@ ; CHECK-NEXT: vsra.vi v25, v25, 2 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer @@ -667,8 +652,7 @@ ; CHECK-NEXT: vsra.vi v26, v26, 2 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer @@ -710,8 +694,7 @@ ; CHECK-NEXT: vsra.vi v28, v28, 2 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer @@ -753,8 +736,7 @@ ; CHECK-NEXT: vsra.vi v16, v16, 2 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer @@ -811,8 +793,7 @@ ; CHECK-NEXT: vsra.vi v25, v25, 1 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0 @@ -870,8 +851,7 @@ ; CHECK-NEXT: vsra.vi v26, v26, 1 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0 @@ -929,8 +909,7 @@ ; CHECK-NEXT: vsra.vi v28, v28, 1 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0 @@ -988,8 +967,7 @@ ; CHECK-NEXT: vsra.vi v16, v16, 1 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv64.ll @@ -34,8 +34,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer @@ -76,8 +75,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer @@ -118,8 +116,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer @@ -160,8 +157,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer @@ -202,8 +198,7 @@ ; CHECK-NEXT: vsrl.vi v28, v26, 7 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer @@ -244,8 +239,7 @@ ; CHECK-NEXT: vsrl.vi v12, v28, 7 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer @@ -286,8 +280,7 @@ ; CHECK-NEXT: vsrl.vi v24, v16, 7 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 64 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer @@ -328,8 +321,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 15 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer @@ -370,8 +362,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 15 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer @@ -412,8 +403,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 15 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer @@ -454,8 +444,7 @@ ; CHECK-NEXT: vsrl.vi v28, v26, 15 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer @@ -496,8 +485,7 @@ ; CHECK-NEXT: vsrl.vi v12, v28, 15 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer @@ -538,8 +526,7 @@ ; CHECK-NEXT: vsrl.vi v24, v16, 15 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer @@ -581,8 +568,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 31 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer @@ -624,8 +610,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 31 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer @@ -667,8 +652,7 @@ ; CHECK-NEXT: vsrl.vi v28, v26, 31 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer @@ -710,8 +694,7 @@ ; CHECK-NEXT: vsrl.vi v12, v28, 31 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer @@ -753,8 +736,7 @@ ; CHECK-NEXT: vsrl.vi v24, v16, 31 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer @@ -802,8 +784,7 @@ ; CHECK-NEXT: vsra.vi v25, v25, 1 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0 %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer @@ -851,8 +832,7 @@ ; CHECK-NEXT: vsra.vi v26, v26, 1 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0 %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer @@ -900,8 +880,7 @@ ; CHECK-NEXT: vsra.vi v28, v28, 1 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0 %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer @@ -949,8 +928,7 @@ ; CHECK-NEXT: vsra.vi v16, v16, 1 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0 %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv32.ll @@ -31,8 +31,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer @@ -70,8 +69,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer @@ -109,8 +107,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer @@ -148,8 +145,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer @@ -187,8 +183,7 @@ ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer @@ -226,8 +221,7 @@ ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer @@ -265,8 +259,7 @@ ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 64 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer @@ -305,8 +298,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer @@ -345,8 +337,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer @@ -385,8 +376,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer @@ -425,8 +415,7 @@ ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer @@ -465,8 +454,7 @@ ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer @@ -505,8 +493,7 @@ ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer @@ -545,8 +532,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer @@ -585,8 +571,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer @@ -625,8 +610,7 @@ ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer @@ -665,8 +649,7 @@ ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer @@ -705,8 +688,7 @@ ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer @@ -759,8 +741,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v25, v25, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0 @@ -814,8 +795,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v26, v26, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0 @@ -869,8 +849,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v28, v28, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0 @@ -924,8 +903,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v16, v16, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv64.ll @@ -31,8 +31,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer @@ -70,8 +69,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> undef, <vscale x 2 x i32> zeroinitializer @@ -109,8 +107,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> undef, <vscale x 4 x i32> zeroinitializer @@ -148,8 +145,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> undef, <vscale x 8 x i32> zeroinitializer @@ -187,8 +183,7 @@ ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 16 x i8> %head, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer @@ -226,8 +221,7 @@ ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 32 x i8> %head, <vscale x 32 x i8> undef, <vscale x 32 x i32> zeroinitializer @@ -265,8 +259,7 @@ ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 64 x i8> undef, i8 -7, i32 0 %splat = shufflevector <vscale x 64 x i8> %head, <vscale x 64 x i8> undef, <vscale x 64 x i32> zeroinitializer @@ -305,8 +298,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer @@ -345,8 +337,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> undef, <vscale x 2 x i32> zeroinitializer @@ -385,8 +376,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> undef, <vscale x 4 x i32> zeroinitializer @@ -425,8 +415,7 @@ ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer @@ -465,8 +454,7 @@ ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 16 x i16> %head, <vscale x 16 x i16> undef, <vscale x 16 x i32> zeroinitializer @@ -505,8 +493,7 @@ ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x i16> undef, i16 -7, i32 0 %splat = shufflevector <vscale x 32 x i16> %head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer @@ -545,8 +532,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 1 x i32> %head, <vscale x 1 x i32> undef, <vscale x 1 x i32> zeroinitializer @@ -585,8 +571,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 2 x i32> %head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer @@ -625,8 +610,7 @@ ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 4 x i32> %head, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer @@ -665,8 +649,7 @@ ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 8 x i32> %head, <vscale x 8 x i32> undef, <vscale x 8 x i32> zeroinitializer @@ -705,8 +688,7 @@ ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x i32> undef, i32 -7, i32 0 %splat = shufflevector <vscale x 16 x i32> %head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer @@ -747,8 +729,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v25, v25, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0 %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer @@ -789,8 +770,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v26, v26, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0 %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer @@ -831,8 +811,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v28, v28, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0 %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer @@ -873,8 +852,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v16, v16, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0 %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer