diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -239,7 +239,10 @@ FNEG_VL, FABS_VL, FSQRT_VL, - FMA_VL, + VFMADD_VL, + VFNMADD_VL, + VFMSUB_VL, + VFNMSUB_VL, FCOPYSIGN_VL, SMIN_VL, SMAX_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3470,7 +3470,7 @@ case ISD::FSQRT: return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL); case ISD::FMA: - return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL); + return lowerToScalableOp(Op, DAG, RISCVISD::VFMADD_VL); case ISD::SMIN: return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL); case ISD::SMAX: @@ -3545,7 +3545,7 @@ case ISD::VP_FNEG: return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL); case ISD::VP_FMA: - return lowerVPOp(Op, DAG, RISCVISD::FMA_VL); + return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL); case ISD::VP_SIGN_EXTEND: case ISD::VP_ZERO_EXTEND: if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1) @@ -8473,6 +8473,36 @@ DAG.getConstant(7, DL, VT)); } +// Convert from one FMA opcode to another based on whether we are negating the +// multiply result or the accumulator. +// NOTE: Only supports RVV operations with VL. +static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) { + if (NegMul) { + // clang-format off + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode"); + case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; + case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; + case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; + case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; + } + // clang-format on + } + + if (NegAcc) { + // clang-format off + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode"); + case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break; + case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break; + case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break; + case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break; + } + // clang-format on + } + + return Opcode; +} SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -8979,6 +9009,40 @@ return V; // Mul is commutative. return combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ true); + case RISCVISD::VFMADD_VL: + case RISCVISD::VFNMADD_VL: + case RISCVISD::VFMSUB_VL: + case RISCVISD::VFNMSUB_VL: { + // Fold FNEG_VL into FMA opcodes. + SDValue A = N->getOperand(0); + SDValue B = N->getOperand(1); + SDValue C = N->getOperand(2); + SDValue Mask = N->getOperand(3); + SDValue VL = N->getOperand(4); + + auto invertIfNegative = [&Mask, &VL](SDValue &V) { + if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask && + V.getOperand(2) == VL) { + // Return the negated input. + V = V.getOperand(0); + return true; + } + + return false; + }; + + bool NegA = invertIfNegative(A); + bool NegB = invertIfNegative(B); + bool NegC = invertIfNegative(C); + + // If no operands are negated, we're done. + if (!NegA && !NegB && !NegC) + return SDValue(); + + unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC); + return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask, + VL); + } case ISD::STORE: { auto *Store = cast(N); SDValue Val = Store->getValue(); @@ -11275,7 +11339,10 @@ NODE_NAME_CASE(FNEG_VL) NODE_NAME_CASE(FABS_VL) NODE_NAME_CASE(FSQRT_VL) - NODE_NAME_CASE(FMA_VL) + NODE_NAME_CASE(VFMADD_VL) + NODE_NAME_CASE(VFNMADD_VL) + NODE_NAME_CASE(VFMSUB_VL) + NODE_NAME_CASE(VFNMSUB_VL) NODE_NAME_CASE(FCOPYSIGN_VL) NODE_NAME_CASE(SMIN_VL) NODE_NAME_CASE(SMAX_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -104,7 +104,10 @@ SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>, SDTCisVT<5, XLenVT>]>; -def riscv_fma_vl : SDNode<"RISCVISD::FMA_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>; +def riscv_vfmadd_vl : SDNode<"RISCVISD::VFMADD_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>; +def riscv_vfnmadd_vl : SDNode<"RISCVISD::VFNMADD_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>; +def riscv_vfmsub_vl : SDNode<"RISCVISD::VFMSUB_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>; +def riscv_vfnmsub_vl : SDNode<"RISCVISD::VFNMSUB_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>; def SDT_RISCVFPRoundOp_VL : SDTypeProfile<1, 3, [ SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>, @@ -903,11 +906,11 @@ } } -multiclass VPatWidenFPMulAccVL_VV_VF { +multiclass VPatWidenFPMulAccVL_VV_VF { foreach vtiToWti = AllWidenableFloatVectors in { defvar vti = vtiToWti.Vti; defvar wti = vtiToWti.Wti; - def : Pat<(riscv_fma_vl + def : Pat<(vop (wti.Vector (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)), @@ -919,7 +922,7 @@ (!cast(instruction_name#"_VV_"#vti.LMul.MX) wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_fma_vl + def : Pat<(vop (wti.Vector (riscv_fpextend_vl_oneuse (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), (vti.Mask true_mask), VLOpFrag)), @@ -934,131 +937,6 @@ } } -multiclass VPatWidenFPNegMulAccVL_VV_VF { - foreach vtiToWti = AllWidenableFloatVectors in { - defvar vti = vtiToWti.Vti; - defvar wti = vtiToWti.Wti; - def : Pat<(riscv_fma_vl - (riscv_fneg_vl - (wti.Vector (riscv_fpextend_vl_oneuse - (vti.Vector vti.RegClass:$rs1), - (vti.Mask true_mask), VLOpFrag)), - (wti.Mask true_mask), VLOpFrag), - (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), - (vti.Mask true_mask), VLOpFrag), - (riscv_fneg_vl wti.RegClass:$rd, (wti.Mask true_mask), - VLOpFrag), - (vti.Mask true_mask), VLOpFrag), - (!cast(instruction_name#"_VV_"#vti.LMul.MX) - wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_fma_vl - (riscv_fpextend_vl_oneuse - (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), - (vti.Mask true_mask), VLOpFrag), - (riscv_fneg_vl - (wti.Vector (riscv_fpextend_vl_oneuse - (vti.Vector vti.RegClass:$rs2), - (vti.Mask true_mask), VLOpFrag)), - (vti.Mask true_mask), VLOpFrag), - (riscv_fneg_vl wti.RegClass:$rd, (wti.Mask true_mask), - VLOpFrag), - (vti.Mask true_mask), VLOpFrag), - (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) - wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_fma_vl - (riscv_fneg_vl - (wti.Vector (riscv_fpextend_vl_oneuse - (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), - (vti.Mask true_mask), VLOpFrag)), - (vti.Mask true_mask), VLOpFrag), - (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), - (vti.Mask true_mask), VLOpFrag), - (riscv_fneg_vl wti.RegClass:$rd, (wti.Mask true_mask), - VLOpFrag), - (vti.Mask true_mask), VLOpFrag), - (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) - wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - } -} - -multiclass VPatWidenFPMulSacVL_VV_VF { - foreach vtiToWti = AllWidenableFloatVectors in { - defvar vti = vtiToWti.Vti; - defvar wti = vtiToWti.Wti; - def : Pat<(riscv_fma_vl - (wti.Vector (riscv_fpextend_vl_oneuse - (vti.Vector vti.RegClass:$rs1), - (vti.Mask true_mask), VLOpFrag)), - (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), - (vti.Mask true_mask), VLOpFrag), - (riscv_fneg_vl wti.RegClass:$rd, (vti.Mask true_mask), - VLOpFrag), - (vti.Mask true_mask), VLOpFrag), - (!cast(instruction_name#"_VV_"#vti.LMul.MX) - wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_fma_vl - (wti.Vector (riscv_fpextend_vl_oneuse - (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), - (vti.Mask true_mask), VLOpFrag)), - (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), - (vti.Mask true_mask), VLOpFrag), - (riscv_fneg_vl wti.RegClass:$rd, (vti.Mask true_mask), - VLOpFrag), - (vti.Mask true_mask), VLOpFrag), - (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) - wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - } -} - -multiclass VPatWidenFPNegMulSacVL_VV_VF { - foreach vtiToWti = AllWidenableFloatVectors in { - defvar vti = vtiToWti.Vti; - defvar wti = vtiToWti.Wti; - def : Pat<(riscv_fma_vl - (riscv_fneg_vl - (wti.Vector (riscv_fpextend_vl_oneuse - (vti.Vector vti.RegClass:$rs1), - (vti.Mask true_mask), VLOpFrag)), - (vti.Mask true_mask), VLOpFrag), - (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), - (vti.Mask true_mask), VLOpFrag), - wti.RegClass:$rd, (wti.Mask true_mask), VLOpFrag), - (!cast(instruction_name#"_VV_"#vti.LMul.MX) - wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_fma_vl - (wti.Vector (riscv_fpextend_vl_oneuse - (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), - (vti.Mask true_mask), VLOpFrag)), - (riscv_fneg_vl - (wti.Vector (riscv_fpextend_vl_oneuse - (vti.Vector vti.RegClass:$rs2), - (vti.Mask true_mask), VLOpFrag)), - (wti.Mask true_mask), VLOpFrag), - wti.RegClass:$rd, (wti.Mask true_mask), VLOpFrag), - (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) - wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_fma_vl - (riscv_fneg_vl - (wti.Vector (riscv_fpextend_vl_oneuse - (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), - (vti.Mask true_mask), VLOpFrag)), - (vti.Mask true_mask), VLOpFrag), - (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), - (vti.Mask true_mask), VLOpFrag), - wti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag), - (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) - wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - } -} - //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -1361,217 +1239,126 @@ // NOTE: We choose VFMADD because it has the most commuting freedom. So it // works best with how TwoAddressInstructionPass tries commuting. defvar suffix = vti.LMul.MX; - def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd, - vti.RegClass:$rs2, (vti.Mask true_mask), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfmadd_vl vti.RegClass:$rs1, vti.RegClass:$rd, + vti.RegClass:$rs2, (vti.Mask true_mask), + VLOpFrag)), (!cast("PseudoVFMADD_VV_"# suffix) vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd, - vti.RegClass:$rs2, (vti.Mask V0), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfmadd_vl vti.RegClass:$rs1, vti.RegClass:$rd, + vti.RegClass:$rs2, (vti.Mask V0), + VLOpFrag)), (!cast("PseudoVFMADD_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd, - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag), - (vti.Mask true_mask), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfmsub_vl vti.RegClass:$rs1, vti.RegClass:$rd, + vti.RegClass:$rs2, (vti.Mask true_mask), + VLOpFrag)), (!cast("PseudoVFMSUB_VV_"# suffix) vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd, - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask srcvalue), - VLOpFrag), - (vti.Mask V0), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfmsub_vl vti.RegClass:$rs1, vti.RegClass:$rd, + vti.RegClass:$rs2, (vti.Mask V0), + VLOpFrag)), (!cast("PseudoVFMSUB_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1, - (vti.Mask srcvalue), - VLOpFrag), - vti.RegClass:$rd, - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask srcvalue), - VLOpFrag), - (vti.Mask true_mask), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfnmadd_vl vti.RegClass:$rs1, vti.RegClass:$rd, + vti.RegClass:$rs2, (vti.Mask true_mask), + VLOpFrag)), (!cast("PseudoVFNMADD_VV_"# suffix) vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1, - (vti.Mask srcvalue), - VLOpFrag), - vti.RegClass:$rd, - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask srcvalue), - VLOpFrag), - (vti.Mask V0), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfnmadd_vl vti.RegClass:$rs1, vti.RegClass:$rd, + vti.RegClass:$rs2, (vti.Mask V0), + VLOpFrag)), (!cast("PseudoVFNMADD_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1, - (vti.Mask srcvalue), - VLOpFrag), - vti.RegClass:$rd, vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfnmsub_vl vti.RegClass:$rs1, vti.RegClass:$rd, + vti.RegClass:$rs2, (vti.Mask true_mask), + VLOpFrag)), (!cast("PseudoVFNMSUB_VV_"# suffix) vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1, - (vti.Mask srcvalue), - VLOpFrag), - vti.RegClass:$rd, vti.RegClass:$rs2, - (vti.Mask V0), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfnmsub_vl vti.RegClass:$rs1, vti.RegClass:$rd, + vti.RegClass:$rs2, (vti.Mask V0), + VLOpFrag)), (!cast("PseudoVFNMSUB_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; // The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally // commutable. - def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1), - vti.RegClass:$rd, vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfmadd_vl (SplatFPOp vti.ScalarRegClass:$rs1), + vti.RegClass:$rd, vti.RegClass:$rs2, + (vti.Mask true_mask), + VLOpFrag)), (!cast("PseudoVFMADD_V" # vti.ScalarSuffix # "_" # suffix) vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1), - vti.RegClass:$rd, vti.RegClass:$rs2, - (vti.Mask V0), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfmadd_vl (SplatFPOp vti.ScalarRegClass:$rs1), + vti.RegClass:$rd, vti.RegClass:$rs2, + (vti.Mask V0), + VLOpFrag)), (!cast("PseudoVFMADD_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1), - vti.RegClass:$rd, - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask srcvalue), - VLOpFrag), - (vti.Mask true_mask), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfmsub_vl (SplatFPOp vti.ScalarRegClass:$rs1), + vti.RegClass:$rd, vti.RegClass:$rs2, + (vti.Mask true_mask), + VLOpFrag)), (!cast("PseudoVFMSUB_V" # vti.ScalarSuffix # "_" # suffix) vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1), - vti.RegClass:$rd, - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask srcvalue), - VLOpFrag), - (vti.Mask V0), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfmsub_vl (SplatFPOp vti.ScalarRegClass:$rs1), + vti.RegClass:$rd, vti.RegClass:$rs2, + (vti.Mask V0), + VLOpFrag)), (!cast("PseudoVFMSUB_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1), - (riscv_fneg_vl vti.RegClass:$rd, - (vti.Mask srcvalue), - VLOpFrag), - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask srcvalue), - VLOpFrag), - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix) - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1), - (riscv_fneg_vl vti.RegClass:$rd, - (vti.Mask srcvalue), - VLOpFrag), - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask srcvalue), - VLOpFrag), - (vti.Mask V0), - VLOpFrag)), - (!cast("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - - def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1), - (riscv_fneg_vl vti.RegClass:$rd, - (vti.Mask srcvalue), - VLOpFrag), - vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix) - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1), - (riscv_fneg_vl vti.RegClass:$rd, - (vti.Mask srcvalue), - VLOpFrag), - vti.RegClass:$rs2, - (vti.Mask V0), - VLOpFrag)), - (!cast("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - - // The splat might be negated. - def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1), - (vti.Mask srcvalue), - VLOpFrag), - vti.RegClass:$rd, - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask srcvalue), - VLOpFrag), - (vti.Mask true_mask), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfnmadd_vl (SplatFPOp vti.ScalarRegClass:$rs1), + vti.RegClass:$rd, vti.RegClass:$rs2, + (vti.Mask true_mask), + VLOpFrag)), (!cast("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix) vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1), - (vti.Mask srcvalue), - VLOpFrag), - vti.RegClass:$rd, - (riscv_fneg_vl vti.RegClass:$rs2, - (vti.Mask srcvalue), - VLOpFrag), - (vti.Mask V0), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfnmadd_vl (SplatFPOp vti.ScalarRegClass:$rs1), + vti.RegClass:$rd, vti.RegClass:$rs2, + (vti.Mask V0), + VLOpFrag)), (!cast("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1), - (vti.Mask srcvalue), - VLOpFrag), - vti.RegClass:$rd, vti.RegClass:$rs2, - (vti.Mask true_mask), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfnmsub_vl (SplatFPOp vti.ScalarRegClass:$rs1), + vti.RegClass:$rd, vti.RegClass:$rs2, + (vti.Mask true_mask), + VLOpFrag)), (!cast("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix) vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1), - (vti.Mask srcvalue), - VLOpFrag), - vti.RegClass:$rd, vti.RegClass:$rs2, - (vti.Mask V0), - VLOpFrag)), + def : Pat<(vti.Vector (riscv_vfnmsub_vl (SplatFPOp vti.ScalarRegClass:$rs1), + vti.RegClass:$rd, vti.RegClass:$rs2, + (vti.Mask V0), + VLOpFrag)), (!cast("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } // 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions -defm : VPatWidenFPMulAccVL_VV_VF<"PseudoVFWMACC">; -defm : VPatWidenFPNegMulAccVL_VV_VF<"PseudoVFWNMACC">; -defm : VPatWidenFPMulSacVL_VV_VF<"PseudoVFWMSAC">; -defm : VPatWidenFPNegMulSacVL_VV_VF<"PseudoVFWNMSAC">; +defm : VPatWidenFPMulAccVL_VV_VF; +defm : VPatWidenFPMulAccVL_VV_VF; +defm : VPatWidenFPMulAccVL_VV_VF; +defm : VPatWidenFPMulAccVL_VV_VF; // 14.11. Vector Floating-Point MIN/MAX Instructions defm : VPatBinaryFPVL_VV_VF; diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -1466,7 +1466,8 @@ ; CHECK-LABEL: vfnmadd_vv_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vfnmadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) @@ -1638,7 +1639,8 @@ ; CHECK-LABEL: vfnmsub_vv_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vfnmadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) @@ -1885,7 +1887,8 @@ ; CHECK-LABEL: vfnmadd_vv_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vfnmadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) @@ -2057,7 +2060,8 @@ ; CHECK-LABEL: vfnmsub_vv_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vfnmadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) @@ -2304,7 +2308,8 @@ ; CHECK-LABEL: vfnmadd_vv_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vfnmadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) @@ -2476,7 +2481,8 @@ ; CHECK-LABEL: vfnmsub_vv_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vfnmadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) @@ -2723,7 +2729,8 @@ ; CHECK-LABEL: vfnmadd_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v10, v12, v0.t +; CHECK-NEXT: vfnmadd.vv v10, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) @@ -2895,7 +2902,8 @@ ; CHECK-LABEL: vfnmsub_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v10, v12, v0.t +; CHECK-NEXT: vfnmadd.vv v10, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) @@ -3142,7 +3150,8 @@ ; CHECK-LABEL: vfnmadd_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v12, v16, v0.t +; CHECK-NEXT: vfnmadd.vv v12, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) @@ -3314,7 +3323,8 @@ ; CHECK-LABEL: vfnmsub_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v12, v16, v0.t +; CHECK-NEXT: vfnmadd.vv v12, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) @@ -3564,7 +3574,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) @@ -3740,7 +3751,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) @@ -3990,7 +4002,8 @@ ; CHECK-LABEL: vfnmadd_vv_nxv1f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vfnmadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv1f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv1f32( %c, %m, i32 %evl) @@ -4162,7 +4175,8 @@ ; CHECK-LABEL: vfnmsub_vv_nxv1f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vfnmadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv1f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv1f32( %c, %m, i32 %evl) @@ -4409,7 +4423,8 @@ ; CHECK-LABEL: vfnmadd_vv_nxv2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vfnmadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv2f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv2f32( %c, %m, i32 %evl) @@ -4581,7 +4596,8 @@ ; CHECK-LABEL: vfnmsub_vv_nxv2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vfnmadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv2f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv2f32( %c, %m, i32 %evl) @@ -4828,7 +4844,8 @@ ; CHECK-LABEL: vfnmadd_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v10, v12, v0.t +; CHECK-NEXT: vfnmadd.vv v10, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv4f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv4f32( %c, %m, i32 %evl) @@ -5000,7 +5017,8 @@ ; CHECK-LABEL: vfnmsub_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v10, v12, v0.t +; CHECK-NEXT: vfnmadd.vv v10, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv4f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv4f32( %c, %m, i32 %evl) @@ -5247,7 +5265,8 @@ ; CHECK-LABEL: vfnmadd_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v12, v16, v0.t +; CHECK-NEXT: vfnmadd.vv v12, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f32( %c, %m, i32 %evl) @@ -5419,7 +5438,8 @@ ; CHECK-LABEL: vfnmsub_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v12, v16, v0.t +; CHECK-NEXT: vfnmadd.vv v12, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f32( %c, %m, i32 %evl) @@ -5669,7 +5689,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) @@ -5845,7 +5866,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) @@ -6095,7 +6117,8 @@ ; CHECK-LABEL: vfnmadd_vv_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vfnmadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv1f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv1f64( %c, %m, i32 %evl) @@ -6267,7 +6290,8 @@ ; CHECK-LABEL: vfnmsub_vv_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: vfnmadd.vv v9, v8, v10, v0.t +; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv1f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv1f64( %c, %m, i32 %evl) @@ -6514,7 +6538,8 @@ ; CHECK-LABEL: vfnmadd_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v10, v12, v0.t +; CHECK-NEXT: vfnmadd.vv v10, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv2f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv2f64( %c, %m, i32 %evl) @@ -6686,7 +6711,8 @@ ; CHECK-LABEL: vfnmsub_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v10, v12, v0.t +; CHECK-NEXT: vfnmadd.vv v10, v8, v12, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv2f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv2f64( %c, %m, i32 %evl) @@ -6933,7 +6959,8 @@ ; CHECK-LABEL: vfnmadd_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v12, v16, v0.t +; CHECK-NEXT: vfnmadd.vv v12, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv4f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv4f64( %c, %m, i32 %evl) @@ -7105,7 +7132,8 @@ ; CHECK-LABEL: vfnmsub_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v12, v16, v0.t +; CHECK-NEXT: vfnmadd.vv v12, v8, v16, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv4f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv4f64( %c, %m, i32 %evl) @@ -7355,7 +7383,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) @@ -7531,7 +7560,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl)