diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1170,7 +1170,13 @@ case CASE_VFMA_OPCODE_LMULS(FMACC, VV): case CASE_VFMA_OPCODE_LMULS(FMSAC, VV): case CASE_VFMA_OPCODE_LMULS(FNMACC, VV): - case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV): { + case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV): + case CASE_VFMA_OPCODE_LMULS(MADD, VX): + case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): + case CASE_VFMA_OPCODE_LMULS(MACC, VX): + case CASE_VFMA_OPCODE_LMULS(NMSAC, VX): + case CASE_VFMA_OPCODE_LMULS(MACC, VV): + case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): { // For these instructions we can only swap operand 1 and operand 3 by // changing the opcode. unsigned CommutableOpIdx1 = 1; @@ -1183,7 +1189,9 @@ case CASE_VFMA_OPCODE_LMULS(FMADD, VV): case CASE_VFMA_OPCODE_LMULS(FMSUB, VV): case CASE_VFMA_OPCODE_LMULS(FNMADD, VV): - case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV): { + case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV): + case CASE_VFMA_OPCODE_LMULS(MADD, VV): + case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { // For these instructions we have more freedom. We can commute with the // other multiplicand or with the addend/subtrahend/minuend. @@ -1288,7 +1296,13 @@ case CASE_VFMA_OPCODE_LMULS(FMACC, VV): case CASE_VFMA_OPCODE_LMULS(FMSAC, VV): case CASE_VFMA_OPCODE_LMULS(FNMACC, VV): - case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV): { + case CASE_VFMA_OPCODE_LMULS(FNMSAC, VV): + case CASE_VFMA_OPCODE_LMULS(MADD, VX): + case CASE_VFMA_OPCODE_LMULS(NMSUB, VX): + case CASE_VFMA_OPCODE_LMULS(MACC, VX): + case CASE_VFMA_OPCODE_LMULS(NMSAC, VX): + case CASE_VFMA_OPCODE_LMULS(MACC, VV): + case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): { // It only make sense to toggle these between clobbering the // addend/subtrahend/minuend one of the multiplicands. assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); @@ -1309,6 +1323,12 @@ CASE_VFMA_CHANGE_OPCODE_LMULS(FMSAC, FMSUB, VV) CASE_VFMA_CHANGE_OPCODE_LMULS(FNMACC, FNMADD, VV) CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSAC, FNMSUB, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX) + CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX) + CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX) + CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX) + CASE_VFMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV) } auto &WorkingMI = cloneIfNew(MI); @@ -1319,7 +1339,9 @@ case CASE_VFMA_OPCODE_LMULS(FMADD, VV): case CASE_VFMA_OPCODE_LMULS(FMSUB, VV): case CASE_VFMA_OPCODE_LMULS(FNMADD, VV): - case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV): { + case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV): + case CASE_VFMA_OPCODE_LMULS(MADD, VV): + case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): { assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index"); // If one of the operands, is the addend we need to change opcode. // Otherwise we're just swapping 2 of the multiplicands. @@ -1332,6 +1354,8 @@ CASE_VFMA_CHANGE_OPCODE_LMULS(FMSUB, FMSAC, VV) CASE_VFMA_CHANGE_OPCODE_LMULS(FNMADD, FNMACC, VV) CASE_VFMA_CHANGE_OPCODE_LMULS(FNMSUB, FNMSAC, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV) + CASE_VFMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV) } auto &WorkingMI = cloneIfNew(MI); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1936,6 +1936,22 @@ multiclass VPseudoTernaryV_VV_VX_AAXA { defm "" : VPseudoTernaryV_VV; defm "" : VPseudoTernaryV_VX_AAXA; + + foreach m = MxList.m in { + // Add a commutable version for use by IR fma. + // NOTE: We need this because we use a tail undisturbed policy on the + // intrinsic version so we can't commute those instructions since it would + // change which input operand is tied to the destination. That would + // remove user control of the tail elements. + let isCommutable = 1, ForceTailAgnostic = true, VLMul = m.value in { + def "_VV_" # m.MX # "_COMMUTABLE" : VPseudoTernaryNoMask; + def "_VX_" # m.MX # "_COMMUTABLE" : + VPseudoTernaryNoMask; + } + } } multiclass VPseudoTernaryV_VV_VF_AAXA { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -472,6 +472,36 @@ defm : VPatBinarySDNode_VV_VX; defm : VPatBinarySDNode_VV_VX; +// 12.13 Vector Single-Width Integer Multiply-Add Instructions. +foreach vti = AllIntegerVectors in { + // NOTE: We choose VMADD because it has the most commuting freedom. So it + // works best with how TwoAddressInstructionPass tries commuting. + defvar suffix = vti.LMul.MX # "_COMMUTABLE"; + def : Pat<(vti.Vector (add vti.RegClass:$rs2, + (mul vti.RegClass:$rs1, vti.RegClass:$rd))), + (!cast("PseudoVMADD_VV_"# suffix) + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW)>; + def : Pat<(vti.Vector (sub vti.RegClass:$rs2, + (mul vti.RegClass:$rs1, vti.RegClass:$rd))), + (!cast("PseudoVNMSUB_VV_"# suffix) + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW)>; + + // The choice of VMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally + // commutable. + def : Pat<(vti.Vector (add vti.RegClass:$rs2, + (mul (SplatPat XLenVT:$rs1), vti.RegClass:$rd))), + (!cast("PseudoVMADD_VX_" # suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW)>; + def : Pat<(vti.Vector (sub vti.RegClass:$rs2, + (mul (SplatPat XLenVT:$rs1), vti.RegClass:$rd))), + (!cast("PseudoVNMSUB_VX_" # suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + vti.AVL, vti.Log2SEW)>; +} + // 12.15. Vector Integer Merge Instructions foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), vti.RegClass:$rs1, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -731,6 +731,50 @@ defm : VPatBinaryVL_VV_VX; defm : VPatBinaryVL_VV_VX; +// 12.13 Vector Single-Width Integer Multiply-Add Instructions +foreach vti = AllIntegerVectors in { + // NOTE: We choose VMADD because it has the most commuting freedom. So it + // works best with how TwoAddressInstructionPass tries commuting. + defvar suffix = vti.LMul.MX # "_COMMUTABLE"; + def : Pat<(vti.Vector + (riscv_add_vl vti.RegClass:$rs2, + (riscv_mul_vl vti.RegClass:$rs1, vti.RegClass:$rd, + (vti.Mask true_mask), VLOpFrag), + (vti.Mask true_mask), VLOpFrag)), + (!cast("PseudoVMADD_VV_"# suffix) + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector + (riscv_sub_vl vti.RegClass:$rs2, + (riscv_mul_vl vti.RegClass:$rs1, vti.RegClass:$rd, + (vti.Mask true_mask), VLOpFrag), + (vti.Mask true_mask), VLOpFrag)), + (!cast("PseudoVNMSUB_VV_"# suffix) + vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW)>; + + // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally + // commutable. + def : Pat<(vti.Vector + (riscv_add_vl vti.RegClass:$rs2, + (riscv_mul_vl (SplatPat XLenVT:$rs1), + vti.RegClass:$rd, (vti.Mask true_mask), + VLOpFrag), + (vti.Mask true_mask), VLOpFrag)), + (!cast("PseudoVMADD_VX_" # suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector + (riscv_sub_vl vti.RegClass:$rs2, + (riscv_mul_vl (SplatPat XLenVT:$rs1), + vti.RegClass:$rd, (vti.Mask true_mask), + VLOpFrag), + (vti.Mask true_mask), VLOpFrag)), + (!cast("PseudoVNMSUB_VX_" # suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.Log2SEW)>; +} + // 12.15. Vector Integer Merge Instructions foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm), diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1233,19 +1233,18 @@ ; RV32-NEXT: vmv.v.i v27, -1 ; RV32-NEXT: vmerge.vim v27, v27, 0, v0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vmul.vv v25, v25, v27 -; RV32-NEXT: vadd.vv v25, v26, v25 +; RV32-NEXT: vmadd.vv v27, v25, v26 ; RV32-NEXT: addi a1, zero, 63 -; RV32-NEXT: vsrl.vx v26, v25, a1 +; RV32-NEXT: vsrl.vx v25, v27, a1 ; RV32-NEXT: addi a1, zero, 1 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; RV32-NEXT: vmv.s.x v27, a1 +; RV32-NEXT: vmv.s.x v26, a1 ; RV32-NEXT: vmv.v.i v28, 0 ; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, mu -; RV32-NEXT: vslideup.vi v28, v27, 2 +; RV32-NEXT: vslideup.vi v28, v26, 2 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; RV32-NEXT: vsra.vv v25, v25, v28 -; RV32-NEXT: vadd.vv v25, v25, v26 +; RV32-NEXT: vsra.vv v26, v27, v28 +; RV32-NEXT: vadd.vv v25, v26, v25 ; RV32-NEXT: vse64.v v25, (a0) ; RV32-NEXT: ret ; @@ -1256,8 +1255,6 @@ ; RV64-NEXT: vmv.v.i v26, -1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV64-NEXT: vmv.s.x v26, zero -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vmul.vv v26, v25, v26 ; RV64-NEXT: lui a1, 21845 ; RV64-NEXT: addiw a1, a1, 1365 ; RV64-NEXT: slli a1, a1, 12 @@ -1266,18 +1263,19 @@ ; RV64-NEXT: addi a1, a1, 1365 ; RV64-NEXT: slli a1, a1, 12 ; RV64-NEXT: addi a2, a1, 1365 +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV64-NEXT: vmv.v.x v27, a2 ; RV64-NEXT: addi a1, a1, 1366 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV64-NEXT: vmv.s.x v27, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV64-NEXT: vmulh.vv v25, v25, v27 -; RV64-NEXT: vadd.vv v25, v25, v26 +; RV64-NEXT: vmulh.vv v27, v25, v27 +; RV64-NEXT: vmacc.vv v27, v25, v26 ; RV64-NEXT: addi a1, zero, 63 -; RV64-NEXT: vsrl.vx v26, v25, a1 -; RV64-NEXT: vid.v v27 -; RV64-NEXT: vsra.vv v25, v25, v27 -; RV64-NEXT: vadd.vv v25, v25, v26 +; RV64-NEXT: vsrl.vx v25, v27, a1 +; RV64-NEXT: vid.v v26 +; RV64-NEXT: vsra.vv v26, v27, v26 +; RV64-NEXT: vadd.vv v25, v26, v25 ; RV64-NEXT: vse64.v v25, (a0) ; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x @@ -4709,37 +4707,36 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV32-NEXT: vle64.v v26, (a0) -; LMULMAX2-RV32-NEXT: addi a1, zero, 51 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.i v28, -1 -; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 0, v0 -; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; LMULMAX2-RV32-NEXT: vmul.vv v28, v26, v28 ; LMULMAX2-RV32-NEXT: addi a1, zero, 17 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a2, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.x v30, a2 +; LMULMAX2-RV32-NEXT: vmv.v.x v28, a2 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1366 -; LMULMAX2-RV32-NEXT: vmerge.vxm v30, v30, a1, v0 +; LMULMAX2-RV32-NEXT: vmerge.vxm v28, v28, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; LMULMAX2-RV32-NEXT: vmulh.vv v26, v26, v30 -; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vmulh.vv v28, v26, v28 +; LMULMAX2-RV32-NEXT: addi a1, zero, 51 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu +; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; LMULMAX2-RV32-NEXT: vmv.v.i v30, -1 +; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 0, v0 +; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; LMULMAX2-RV32-NEXT: vmadd.vv v30, v26, v28 ; LMULMAX2-RV32-NEXT: addi a1, zero, 63 -; LMULMAX2-RV32-NEXT: vsrl.vx v28, v26, a1 +; LMULMAX2-RV32-NEXT: vsrl.vx v26, v30, a1 ; LMULMAX2-RV32-NEXT: addi a1, zero, 68 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; LMULMAX2-RV32-NEXT: vmv.v.i v30, 0 -; LMULMAX2-RV32-NEXT: vmerge.vim v30, v30, 1, v0 +; LMULMAX2-RV32-NEXT: vmv.v.i v28, 0 +; LMULMAX2-RV32-NEXT: vmerge.vim v28, v28, 1, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; LMULMAX2-RV32-NEXT: vsra.vv v26, v26, v30 -; LMULMAX2-RV32-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV32-NEXT: vsra.vv v28, v30, v28 +; LMULMAX2-RV32-NEXT: vadd.vv v26, v28, v26 ; LMULMAX2-RV32-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -4753,7 +4750,6 @@ ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.v.i v28, -1 ; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 0, v0 -; LMULMAX2-RV64-NEXT: vmul.vv v28, v26, v28 ; LMULMAX2-RV64-NEXT: lui a1, 21845 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 ; LMULMAX2-RV64-NEXT: slli a1, a1, 12 @@ -4765,14 +4761,14 @@ ; LMULMAX2-RV64-NEXT: vmv.v.x v30, a2 ; LMULMAX2-RV64-NEXT: addi a1, a1, 1366 ; LMULMAX2-RV64-NEXT: vmerge.vxm v30, v30, a1, v0 -; LMULMAX2-RV64-NEXT: vmulh.vv v26, v26, v30 -; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vmulh.vv v30, v26, v30 +; LMULMAX2-RV64-NEXT: vmacc.vv v30, v26, v28 ; LMULMAX2-RV64-NEXT: addi a1, zero, 63 -; LMULMAX2-RV64-NEXT: vsrl.vx v28, v26, a1 -; LMULMAX2-RV64-NEXT: vmv.v.i v30, 1 -; LMULMAX2-RV64-NEXT: vmerge.vim v30, v30, 0, v0 -; LMULMAX2-RV64-NEXT: vsra.vv v26, v26, v30 -; LMULMAX2-RV64-NEXT: vadd.vv v26, v26, v28 +; LMULMAX2-RV64-NEXT: vsrl.vx v26, v30, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v28, 1 +; LMULMAX2-RV64-NEXT: vmerge.vim v28, v28, 0, v0 +; LMULMAX2-RV64-NEXT: vsra.vv v28, v30, v28 +; LMULMAX2-RV64-NEXT: vadd.vv v26, v28, v26 ; LMULMAX2-RV64-NEXT: vse64.v v26, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -4802,8 +4798,6 @@ ; LMULMAX1-RV64-NEXT: vmv.v.i v27, -1 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v27, zero -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vmul.vv v28, v26, v27 ; LMULMAX1-RV64-NEXT: lui a2, 21845 ; LMULMAX1-RV64-NEXT: addiw a2, a2, 1365 ; LMULMAX1-RV64-NEXT: slli a2, a2, 12 @@ -4812,24 +4806,24 @@ ; LMULMAX1-RV64-NEXT: addi a2, a2, 1365 ; LMULMAX1-RV64-NEXT: slli a2, a2, 12 ; LMULMAX1-RV64-NEXT: addi a3, a2, 1365 -; LMULMAX1-RV64-NEXT: vmv.v.x v29, a3 +; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; LMULMAX1-RV64-NEXT: vmv.v.x v28, a3 ; LMULMAX1-RV64-NEXT: addi a2, a2, 1366 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu -; LMULMAX1-RV64-NEXT: vmv.s.x v29, a2 +; LMULMAX1-RV64-NEXT: vmv.s.x v28, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; LMULMAX1-RV64-NEXT: vmulh.vv v26, v26, v29 -; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v28 +; LMULMAX1-RV64-NEXT: vmulh.vv v29, v26, v28 +; LMULMAX1-RV64-NEXT: vmacc.vv v29, v27, v26 ; LMULMAX1-RV64-NEXT: addi a2, zero, 63 -; LMULMAX1-RV64-NEXT: vsrl.vx v28, v26, a2 +; LMULMAX1-RV64-NEXT: vsrl.vx v26, v29, a2 ; LMULMAX1-RV64-NEXT: vid.v v30 -; LMULMAX1-RV64-NEXT: vsra.vv v26, v26, v30 -; LMULMAX1-RV64-NEXT: vadd.vv v26, v26, v28 -; LMULMAX1-RV64-NEXT: vmul.vv v27, v25, v27 -; LMULMAX1-RV64-NEXT: vmulh.vv v25, v25, v29 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v27 -; LMULMAX1-RV64-NEXT: vsrl.vx v27, v25, a2 -; LMULMAX1-RV64-NEXT: vsra.vv v25, v25, v30 -; LMULMAX1-RV64-NEXT: vadd.vv v25, v25, v27 +; LMULMAX1-RV64-NEXT: vsra.vv v29, v29, v30 +; LMULMAX1-RV64-NEXT: vadd.vv v26, v29, v26 +; LMULMAX1-RV64-NEXT: vmulh.vv v28, v25, v28 +; LMULMAX1-RV64-NEXT: vmacc.vv v28, v25, v27 +; LMULMAX1-RV64-NEXT: vsrl.vx v25, v28, a2 +; LMULMAX1-RV64-NEXT: vsra.vv v27, v28, v30 +; LMULMAX1-RV64-NEXT: vadd.vv v25, v27, v25 ; LMULMAX1-RV64-NEXT: vse64.v v25, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v26, (a1) ; LMULMAX1-RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -480,15 +480,16 @@ define @mul_stepvector_nxv16i64() { ; CHECK-LABEL: mul_stepvector_nxv16i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: addi a0, zero, 3 -; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a1, zero, 24 ; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: vadd.vx v16, v8, a0 +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: addi a0, zero, 3 +; CHECK-NEXT: vmacc.vx v16, a0, v8 +; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret entry: %0 = insertelement poison, i64 3, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll @@ -0,0 +1,592 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +; This tests a mix of vmacc and vmadd by using different operand orders to +; trigger commuting in TwoAddressInstructionPass. + +define @vmadd_vv_nxv1i8( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul %va, %vb + %y = add %x, %vc + ret %y +} + +define @vmadd_vx_nxv1i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = add %x, %vb + ret %y +} + +define @vmadd_vv_nxv2i8( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul %va, %vc + %y = add %x, %vb + ret %y +} + +define @vmadd_vx_nxv2i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = add %x, %va + ret %y +} + +define @vmadd_vv_nxv4i8( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul %vb, %va + %y = add %x, %vc + ret %y +} + +define @vmadd_vx_nxv4i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = add %x, %vb + ret %y +} + +define @vmadd_vv_nxv8i8( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-NEXT: vmacc.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul %vb, %vc + %y = add %x, %va + ret %y +} + +define @vmadd_vx_nxv8i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = add %x, %va + ret %y +} + +define @vmadd_vv_nxv16i8( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v12, v10 +; CHECK-NEXT: ret + %x = mul %vc, %va + %y = add %x, %vb + ret %y +} + +define @vmadd_vx_nxv16i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = add %x, %vb + ret %y +} + +define @vmadd_vv_nxv32i8( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu +; CHECK-NEXT: vmacc.vv v8, v16, v12 +; CHECK-NEXT: ret + %x = mul %vc, %vb + %y = add %x, %va + ret %y +} + +define @vmadd_vx_nxv32i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = add %x, %va + ret %y +} + +define @vmadd_vv_nxv64i8( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu +; CHECK-NEXT: vmacc.vv v8, v16, v24 +; CHECK-NEXT: ret + %x = mul %vc, %vb + %y = add %x, %va + ret %y +} + +define @vmadd_vx_nxv64i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vmadd_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = add %x, %va + ret %y +} + +define @vmadd_vv_nxv1i16( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul %va, %vb + %y = add %x, %vc + ret %y +} + +define @vmadd_vx_nxv1i16( %va, %vb, i16 %c) { +; CHECK-LABEL: vmadd_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i16 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = add %x, %vb + ret %y +} + +define @vmadd_vv_nxv2i16( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul %va, %vc + %y = add %x, %vb + ret %y +} + +define @vmadd_vx_nxv2i16( %va, %vb, i16 %c) { +; CHECK-LABEL: vmadd_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i16 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = add %x, %va + ret %y +} + +define @vmadd_vv_nxv4i16( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul %vb, %va + %y = add %x, %vc + ret %y +} + +define @vmadd_vx_nxv4i16( %va, %vb, i16 %c) { +; CHECK-LABEL: vmadd_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i16 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = add %x, %vb + ret %y +} + +define @vmadd_vv_nxv8i16( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vmacc.vv v8, v12, v10 +; CHECK-NEXT: ret + %x = mul %vb, %vc + %y = add %x, %va + ret %y +} + +define @vmadd_vx_nxv8i16( %va, %vb, i16 %c) { +; CHECK-LABEL: vmadd_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i16 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = add %x, %va + ret %y +} + +define @vmadd_vv_nxv16i16( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v16, v12 +; CHECK-NEXT: ret + %x = mul %vc, %va + %y = add %x, %vb + ret %y +} + +define @vmadd_vx_nxv16i16( %va, %vb, i16 %c) { +; CHECK-LABEL: vmadd_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i16 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = add %x, %vb + ret %y +} + +define @vmadd_vv_nxv32i16( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vmacc.vv v8, v16, v24 +; CHECK-NEXT: ret + %x = mul %vc, %vb + %y = add %x, %va + ret %y +} + +define @vmadd_vx_nxv32i16( %va, %vb, i16 %c) { +; CHECK-LABEL: vmadd_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i16 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = add %x, %va + ret %y +} + +define @vmadd_vv_nxv1i32( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul %va, %vb + %y = add %x, %vc + ret %y +} + +define @vmadd_vx_nxv1i32( %va, %vb, i32 %c) { +; CHECK-LABEL: vmadd_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = add %x, %vb + ret %y +} + +define @vmadd_vv_nxv2i32( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vmadd.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul %va, %vc + %y = add %x, %vb + ret %y +} + +define @vmadd_vx_nxv2i32( %va, %vb, i32 %c) { +; CHECK-LABEL: vmadd_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = add %x, %va + ret %y +} + +define @vmadd_vv_nxv4i32( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v10, v12 +; CHECK-NEXT: ret + %x = mul %vb, %va + %y = add %x, %vc + ret %y +} + +define @vmadd_vx_nxv4i32( %va, %vb, i32 %c) { +; CHECK-LABEL: vmadd_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = add %x, %vb + ret %y +} + +define @vmadd_vv_nxv8i32( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vmacc.vv v8, v16, v12 +; CHECK-NEXT: ret + %x = mul %vb, %vc + %y = add %x, %va + ret %y +} + +define @vmadd_vx_nxv8i32( %va, %vb, i32 %c) { +; CHECK-LABEL: vmadd_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vmacc.vx v8, a0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = add %x, %va + ret %y +} + +define @vmadd_vv_nxv16i32( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vmadd.vv v8, v24, v16 +; CHECK-NEXT: ret + %x = mul %vc, %va + %y = add %x, %vb + ret %y +} + +define @vmadd_vx_nxv16i32( %va, %vb, i32 %c) { +; CHECK-LABEL: vmadd_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vmadd.vx v8, a0, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = add %x, %vb + ret %y +} + +define @vmadd_vv_nxv1i64( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vmadd.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul %va, %vb + %y = add %x, %vc + ret %y +} + +define @vmadd_vx_nxv1i64( %va, %vb, i64 %c) { +; RV32-LABEL: vmadd_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vmadd.vv v8, v25, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; RV64-NEXT: vmadd.vx v8, a0, v9 +; RV64-NEXT: ret + %head = insertelement undef, i64 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = add %x, %vb + ret %y +} + +define @vmadd_vv_nxv2i64( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vmadd.vv v8, v12, v10 +; CHECK-NEXT: ret + %x = mul %va, %vc + %y = add %x, %vb + ret %y +} + +define @vmadd_vx_nxv2i64( %va, %vb, i64 %c) { +; RV32-LABEL: vmadd_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vmacc.vv v8, v10, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; RV64-NEXT: vmacc.vx v8, a0, v10 +; RV64-NEXT: ret + %head = insertelement undef, i64 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = add %x, %va + ret %y +} + +define @vmadd_vv_nxv4i64( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vmadd.vv v8, v12, v16 +; CHECK-NEXT: ret + %x = mul %vb, %va + %y = add %x, %vc + ret %y +} + +define @vmadd_vx_nxv4i64( %va, %vb, i64 %c) { +; RV32-LABEL: vmadd_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vmadd.vv v8, v28, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; RV64-NEXT: vmadd.vx v8, a0, v12 +; RV64-NEXT: ret + %head = insertelement undef, i64 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = add %x, %vb + ret %y +} + +define @vmadd_vv_nxv8i64( %va, %vb, %vc) { +; CHECK-LABEL: vmadd_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vmacc.vv v8, v16, v24 +; CHECK-NEXT: ret + %x = mul %vb, %vc + %y = add %x, %va + ret %y +} + +define @vmadd_vx_nxv8i64( %va, %vb, i64 %c) { +; RV32-LABEL: vmadd_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vmacc.vv v8, v16, v24 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vmadd_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vmacc.vx v8, a0, v16 +; RV64-NEXT: ret + %head = insertelement undef, i64 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = add %x, %va + ret %y +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode-rv32.ll @@ -6,11 +6,11 @@ define @srem_eq_fold_nxv4i8( %va) { ; CHECK-LABEL: srem_eq_fold_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, zero, -85 -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmul.vx v25, v8, a0 ; CHECK-NEXT: addi a0, zero, 42 -; CHECK-NEXT: vadd.vx v25, v25, a0 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.x v25, a0 +; CHECK-NEXT: addi a1, zero, -85 +; CHECK-NEXT: vmacc.vx v25, a1, v8 ; CHECK-NEXT: vsll.vi v26, v25, 7 ; CHECK-NEXT: vsrl.vi v25, v25, 1 ; CHECK-NEXT: vor.vv v25, v25, v26 diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll @@ -0,0 +1,592 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +; This tests a mix of vmacc and vmsub by using different operand orders to +; trigger commuting in TwosubressInstructionPass. + +define @vnmsub_vv_nxv1i8( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul %va, %vb + %y = sub %vc, %x + ret %y +} + +define @vnmsub_vx_nxv1i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vv_nxv2i8( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul %va, %vc + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vx_nxv2i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = sub %va, %x + ret %y +} + +define @vnmsub_vv_nxv4i8( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul %vb, %va + %y = sub %vc, %x + ret %y +} + +define @vnmsub_vx_nxv4i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vv_nxv8i8( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul %vb, %vc + %y = sub %va, %x + ret %y +} + +define @vnmsub_vx_nxv8i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = sub %va, %x + ret %y +} + +define @vnmsub_vv_nxv16i8( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v12, v10 +; CHECK-NEXT: ret + %x = mul %vc, %va + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vx_nxv16i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vv_nxv32i8( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v16, v12 +; CHECK-NEXT: ret + %x = mul %vc, %vb + %y = sub %va, %x + ret %y +} + +define @vnmsub_vx_nxv32i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = sub %va, %x + ret %y +} + +define @vnmsub_vv_nxv64i8( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8r.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v16, v24 +; CHECK-NEXT: ret + %x = mul %vc, %vb + %y = sub %va, %x + ret %y +} + +define @vnmsub_vx_nxv64i8( %va, %vb, i8 %c) { +; CHECK-LABEL: vnmsub_vx_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i8 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = sub %va, %x + ret %y +} + +define @vnmsub_vv_nxv1i16( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul %va, %vb + %y = sub %vc, %x + ret %y +} + +define @vnmsub_vx_nxv1i16( %va, %vb, i16 %c) { +; CHECK-LABEL: vnmsub_vx_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i16 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vv_nxv2i16( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul %va, %vc + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vx_nxv2i16( %va, %vb, i16 %c) { +; CHECK-LABEL: vnmsub_vx_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i16 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = sub %va, %x + ret %y +} + +define @vnmsub_vv_nxv4i16( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul %vb, %va + %y = sub %vc, %x + ret %y +} + +define @vnmsub_vx_nxv4i16( %va, %vb, i16 %c) { +; CHECK-LABEL: vnmsub_vx_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i16 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vv_nxv8i16( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v12, v10 +; CHECK-NEXT: ret + %x = mul %vb, %vc + %y = sub %va, %x + ret %y +} + +define @vnmsub_vx_nxv8i16( %va, %vb, i16 %c) { +; CHECK-LABEL: vnmsub_vx_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i16 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = sub %va, %x + ret %y +} + +define @vnmsub_vv_nxv16i16( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v16, v12 +; CHECK-NEXT: ret + %x = mul %vc, %va + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vx_nxv16i16( %va, %vb, i16 %c) { +; CHECK-LABEL: vnmsub_vx_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i16 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vv_nxv32i16( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v16, v24 +; CHECK-NEXT: ret + %x = mul %vc, %vb + %y = sub %va, %x + ret %y +} + +define @vnmsub_vx_nxv32i16( %va, %vb, i16 %c) { +; CHECK-LABEL: vnmsub_vx_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i16 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = sub %va, %x + ret %y +} + +define @vnmsub_vv_nxv1i32( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul %va, %vb + %y = sub %vc, %x + ret %y +} + +define @vnmsub_vx_nxv1i32( %va, %vb, i32 %c) { +; CHECK-LABEL: vnmsub_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vv_nxv2i32( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v10, v9 +; CHECK-NEXT: ret + %x = mul %va, %vc + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vx_nxv2i32( %va, %vb, i32 %c) { +; CHECK-LABEL: vnmsub_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v9 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = sub %va, %x + ret %y +} + +define @vnmsub_vv_nxv4i32( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v10, v12 +; CHECK-NEXT: ret + %x = mul %vb, %va + %y = sub %vc, %x + ret %y +} + +define @vnmsub_vx_nxv4i32( %va, %vb, i32 %c) { +; CHECK-LABEL: vnmsub_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v10 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vv_nxv8i32( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v16, v12 +; CHECK-NEXT: ret + %x = mul %vb, %vc + %y = sub %va, %x + ret %y +} + +define @vnmsub_vx_nxv8i32( %va, %vb, i32 %c) { +; CHECK-LABEL: vnmsub_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vnmsac.vx v8, a0, v12 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = sub %va, %x + ret %y +} + +define @vnmsub_vv_nxv16i32( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v24, v16 +; CHECK-NEXT: ret + %x = mul %vc, %va + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vx_nxv16i32( %va, %vb, i32 %c) { +; CHECK-LABEL: vnmsub_vx_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, mu +; CHECK-NEXT: vnmsub.vx v8, a0, v16 +; CHECK-NEXT: ret + %head = insertelement undef, i32 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vv_nxv1i64( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret + %x = mul %va, %vb + %y = sub %vc, %x + ret %y +} + +define @vnmsub_vx_nxv1i64( %va, %vb, i64 %c) { +; RV32-LABEL: vnmsub_vx_nxv1i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v25, (a0), zero +; RV32-NEXT: vnmsub.vv v8, v25, v9 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsub_vx_nxv1i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu +; RV64-NEXT: vnmsub.vx v8, a0, v9 +; RV64-NEXT: ret + %head = insertelement undef, i64 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vv_nxv2i64( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v12, v10 +; CHECK-NEXT: ret + %x = mul %va, %vc + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vx_nxv2i64( %va, %vb, i64 %c) { +; RV32-LABEL: vnmsub_vx_nxv2i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v26, (a0), zero +; RV32-NEXT: vnmsac.vv v8, v10, v26 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsub_vx_nxv2i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, mu +; RV64-NEXT: vnmsac.vx v8, a0, v10 +; RV64-NEXT: ret + %head = insertelement undef, i64 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = sub %va, %x + ret %y +} + +define @vnmsub_vv_nxv4i64( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: vnmsub.vv v8, v12, v16 +; CHECK-NEXT: ret + %x = mul %vb, %va + %y = sub %vc, %x + ret %y +} + +define @vnmsub_vx_nxv4i64( %va, %vb, i64 %c) { +; RV32-LABEL: vnmsub_vx_nxv4i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v28, (a0), zero +; RV32-NEXT: vnmsub.vv v8, v28, v12 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsub_vx_nxv4i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, mu +; RV64-NEXT: vnmsub.vx v8, a0, v12 +; RV64-NEXT: ret + %head = insertelement undef, i64 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %va, %splat + %y = sub %vb, %x + ret %y +} + +define @vnmsub_vv_nxv8i64( %va, %vb, %vc) { +; CHECK-LABEL: vnmsub_vv_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; CHECK-NEXT: vnmsac.vv v8, v16, v24 +; CHECK-NEXT: ret + %x = mul %vb, %vc + %y = sub %va, %x + ret %y +} + +define @vnmsub_vx_nxv8i64( %va, %vb, i64 %c) { +; RV32-LABEL: vnmsub_vx_nxv8i64: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vnmsac.vv v8, v16, v24 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vnmsub_vx_nxv8i64: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu +; RV64-NEXT: vnmsac.vx v8, a0, v16 +; RV64-NEXT: ret + %head = insertelement undef, i64 %c, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %x = mul %vb, %splat + %y = sub %va, %x + ret %y +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv32.ll @@ -34,8 +34,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -76,8 +75,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -118,8 +116,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -160,8 +157,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -202,8 +198,7 @@ ; CHECK-NEXT: vsrl.vi v28, v26, 7 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -244,8 +239,7 @@ ; CHECK-NEXT: vsrl.vi v12, v28, 7 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -286,8 +280,7 @@ ; CHECK-NEXT: vsrl.vi v24, v16, 7 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -328,8 +321,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 15 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -370,8 +362,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 15 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -412,8 +403,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 15 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -454,8 +444,7 @@ ; CHECK-NEXT: vsrl.vi v28, v26, 15 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -496,8 +485,7 @@ ; CHECK-NEXT: vsrl.vi v12, v28, 15 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -538,8 +526,7 @@ ; CHECK-NEXT: vsrl.vi v24, v16, 15 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -581,8 +568,7 @@ ; CHECK-NEXT: vsra.vi v25, v25, 2 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -624,8 +610,7 @@ ; CHECK-NEXT: vsra.vi v25, v25, 2 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -667,8 +652,7 @@ ; CHECK-NEXT: vsra.vi v26, v26, 2 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -710,8 +694,7 @@ ; CHECK-NEXT: vsra.vi v28, v28, 2 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -753,8 +736,7 @@ ; CHECK-NEXT: vsra.vi v16, v16, 2 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -811,8 +793,7 @@ ; CHECK-NEXT: vsra.vi v25, v25, 1 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 @@ -870,8 +851,7 @@ ; CHECK-NEXT: vsra.vi v26, v26, 1 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 @@ -929,8 +909,7 @@ ; CHECK-NEXT: vsra.vi v28, v28, 1 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 @@ -988,8 +967,7 @@ ; CHECK-NEXT: vsra.vi v16, v16, 1 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode-rv64.ll @@ -34,8 +34,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -76,8 +75,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -118,8 +116,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -160,8 +157,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 7 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -202,8 +198,7 @@ ; CHECK-NEXT: vsrl.vi v28, v26, 7 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -244,8 +239,7 @@ ; CHECK-NEXT: vsrl.vi v12, v28, 7 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -286,8 +280,7 @@ ; CHECK-NEXT: vsrl.vi v24, v16, 7 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -328,8 +321,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 15 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -370,8 +362,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 15 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -412,8 +403,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 15 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -454,8 +444,7 @@ ; CHECK-NEXT: vsrl.vi v28, v26, 15 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -496,8 +485,7 @@ ; CHECK-NEXT: vsrl.vi v12, v28, 15 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -538,8 +526,7 @@ ; CHECK-NEXT: vsrl.vi v24, v16, 15 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -581,8 +568,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 31 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -624,8 +610,7 @@ ; CHECK-NEXT: vsrl.vi v26, v25, 31 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -667,8 +652,7 @@ ; CHECK-NEXT: vsrl.vi v28, v26, 31 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -710,8 +694,7 @@ ; CHECK-NEXT: vsrl.vi v12, v28, 31 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -753,8 +736,7 @@ ; CHECK-NEXT: vsrl.vi v24, v16, 31 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -802,8 +784,7 @@ ; CHECK-NEXT: vsra.vi v25, v25, 1 ; CHECK-NEXT: vadd.vv v25, v25, v26 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -851,8 +832,7 @@ ; CHECK-NEXT: vsra.vi v26, v26, 1 ; CHECK-NEXT: vadd.vv v26, v26, v28 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -900,8 +880,7 @@ ; CHECK-NEXT: vsra.vi v28, v28, 1 ; CHECK-NEXT: vadd.vv v28, v28, v12 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -949,8 +928,7 @@ ; CHECK-NEXT: vsra.vi v16, v16, 1 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv32.ll @@ -31,8 +31,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -70,8 +69,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -109,8 +107,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -148,8 +145,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -187,8 +183,7 @@ ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -226,8 +221,7 @@ ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -265,8 +259,7 @@ ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -305,8 +298,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -345,8 +337,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -385,8 +376,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -425,8 +415,7 @@ ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -465,8 +454,7 @@ ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -505,8 +493,7 @@ ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -545,8 +532,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -585,8 +571,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -625,8 +610,7 @@ ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -665,8 +649,7 @@ ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -705,8 +688,7 @@ ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -759,8 +741,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v25, v25, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 @@ -814,8 +795,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v26, v26, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 @@ -869,8 +849,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v28, v28, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 @@ -924,8 +903,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v16, v16, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv64.ll @@ -31,8 +31,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -70,8 +69,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -109,8 +107,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -148,8 +145,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -187,8 +183,7 @@ ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -226,8 +221,7 @@ ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -265,8 +259,7 @@ ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 5 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -305,8 +298,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -345,8 +337,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -385,8 +376,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -425,8 +415,7 @@ ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -465,8 +454,7 @@ ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -505,8 +493,7 @@ ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 13 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -545,8 +532,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -585,8 +571,7 @@ ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -625,8 +610,7 @@ ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -665,8 +649,7 @@ ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -705,8 +688,7 @@ ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 29 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -747,8 +729,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v25, v25, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v25, v25, a0 -; CHECK-NEXT: vsub.vv v8, v8, v25 +; CHECK-NEXT: vnmsac.vx v8, a0, v25 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -789,8 +770,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v26, v26, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v26, v26, a0 -; CHECK-NEXT: vsub.vv v8, v8, v26 +; CHECK-NEXT: vnmsac.vx v8, a0, v26 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -831,8 +811,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v28, v28, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v28, v28, a0 -; CHECK-NEXT: vsub.vv v8, v8, v28 +; CHECK-NEXT: vnmsac.vx v8, a0, v28 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -873,8 +852,7 @@ ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v16, v16, a0 ; CHECK-NEXT: addi a0, zero, -7 -; CHECK-NEXT: vmul.vx v16, v16, a0 -; CHECK-NEXT: vsub.vv v8, v8, v16 +; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer