Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -111,10 +111,8 @@ ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, ISD::SETGE, ISD::SETNE}; - // TODO: add proper support for the various FMA variants - // (FMADD.S, FMSUB.S, FNMSUB.S, FNMADD.S). ISD::NodeType FPOpToExtend[] = { - ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FMA, ISD::FREM}; + ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM}; if (Subtarget.hasStdExtF()) { setOperationAction(ISD::FMINNUM, MVT::f32, Legal); Index: lib/Target/RISCV/RISCVInstrInfoD.td =================================================================== --- lib/Target/RISCV/RISCVInstrInfoD.td +++ lib/Target/RISCV/RISCVInstrInfoD.td @@ -230,6 +230,22 @@ def : PatFpr64Fpr64; def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>; +// fmadd: rs1 * rs2 + rs3 +def : Pat<(fma FPR64:$rs1, FPR64:$rs2, FPR64:$rs3), + (FMADD_D $rs1, $rs2, $rs3, 0b111)>; + +// fmsub: rs1 * rs2 - rs3 +def : Pat<(fma FPR64:$rs1, FPR64:$rs2, (fneg FPR64:$rs3)), + (FMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; + +// fnmsub: -rs1 * rs2 + rs3 +def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3), + (FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; + +// fnmadd: -rs1 * rs2 - rs3 +def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)), + (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; + // The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the // canonical NaN when giving a signaling NaN. This doesn't match the LLVM // behaviour (see https://bugs.llvm.org/show_bug.cgi?id=27363). However, the Index: lib/Target/RISCV/RISCVInstrInfoF.td =================================================================== --- lib/Target/RISCV/RISCVInstrInfoF.td +++ lib/Target/RISCV/RISCVInstrInfoF.td @@ -270,6 +270,22 @@ def : PatFpr32Fpr32; def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), (FSGNJN_S $rs1, $rs2)>; +// fmadd: rs1 * rs2 + rs3 +def : Pat<(fma FPR32:$rs1, FPR32:$rs2, FPR32:$rs3), + (FMADD_S $rs1, $rs2, $rs3, 0b111)>; + +// fmsub: rs1 * rs2 - rs3 +def : Pat<(fma FPR32:$rs1, FPR32:$rs2, (fneg FPR32:$rs3)), + (FMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; + +// fnmsub: -rs1 * rs2 + rs3 +def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3), + (FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; + +// fnmadd: -rs1 * rs2 - rs3 +def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)), + (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; + // The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the // canonical NaN when given a signaling NaN. This doesn't match the LLVM // behaviour (see https://bugs.llvm.org/show_bug.cgi?id=27363). However, the Index: test/CodeGen/RISCV/alu32.ll =================================================================== --- test/CodeGen/RISCV/alu32.ll +++ test/CodeGen/RISCV/alu32.ll @@ -2,9 +2,9 @@ ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I -; These tests are each targeted at a particular RISC-V ALU instruction. Other -; files in this folder exercise LLVM IR instructions that don't directly match a -; RISC-V instruction +; These tests are each targeted at a particular RISC-V ALU instruction. Most +; other files in this folder exercise LLVM IR instructions that don't directly +; match a RISC-V instruction. ; Register-immediate instructions Index: test/CodeGen/RISCV/double-arith.ll =================================================================== --- test/CodeGen/RISCV/double-arith.ll +++ test/CodeGen/RISCV/double-arith.ll @@ -2,6 +2,10 @@ ; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32IFD %s +; These tests are each targeted at a particular RISC-V FPU instruction. Most +; other files in this folder exercise LLVM IR instructions that don't directly +; match a RISC-V instruction. + define double @fadd_d(double %a, double %b) nounwind { ; RV32IFD-LABEL: fadd_d: ; RV32IFD: # %bb.0: @@ -277,3 +281,118 @@ %2 = zext i1 %1 to i32 ret i32 %2 } + +declare double @llvm.fma.f64(double, double, double) + +define double @fmadd_d(double %a, double %b, double %c) { +; RV32IFD-LABEL: fmadd_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a4, 8(sp) +; RV32IFD-NEXT: sw a5, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: sw a2, 8(sp) +; RV32IFD-NEXT: sw a3, 12(sp) +; RV32IFD-NEXT: fld ft1, 8(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft2, 8(sp) +; RV32IFD-NEXT: fmadd.d ft0, ft2, ft1, ft0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret + %1 = call double @llvm.fma.f64(double %a, double %b, double %c) + ret double %1 +} + +define double @fmsub_d(double %a, double %b, double %c) { +; RV32IFD-LABEL: fmsub_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a2, 8(sp) +; RV32IFD-NEXT: sw a3, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft1, 8(sp) +; RV32IFD-NEXT: sw a4, 8(sp) +; RV32IFD-NEXT: sw a5, 12(sp) +; RV32IFD-NEXT: fld ft2, 8(sp) +; RV32IFD-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IFD-NEXT: addi a0, a0, %lo(.LCPI15_0) +; RV32IFD-NEXT: fld ft3, 0(a0) +; RV32IFD-NEXT: fadd.d ft2, ft2, ft3 +; RV32IFD-NEXT: fmsub.d ft0, ft1, ft0, ft2 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret + %c_ = fadd double 0.0, %c ; avoid negation using xor + %negc = fsub double -0.0, %c_ + %1 = tail call double @llvm.fma.f64(double %a, double %b, double %negc) + ret double %1 +} + +define double @fnmadd_d(double %a, double %b, double %c) { +; RV32IFD-LABEL: fnmadd_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a2, 8(sp) +; RV32IFD-NEXT: sw a3, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft1, 8(sp) +; RV32IFD-NEXT: sw a4, 8(sp) +; RV32IFD-NEXT: sw a5, 12(sp) +; RV32IFD-NEXT: fld ft2, 8(sp) +; RV32IFD-NEXT: lui a0, %hi(.LCPI16_0) +; RV32IFD-NEXT: addi a0, a0, %lo(.LCPI16_0) +; RV32IFD-NEXT: fld ft3, 0(a0) +; RV32IFD-NEXT: fadd.d ft2, ft2, ft3 +; RV32IFD-NEXT: fadd.d ft1, ft1, ft3 +; RV32IFD-NEXT: fnmadd.d ft0, ft1, ft0, ft2 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret + %a_ = fadd double 0.0, %a + %c_ = fadd double 0.0, %c + %nega = fsub double -0.0, %a_ + %negc = fsub double -0.0, %c_ + %1 = tail call double @llvm.fma.f64(double %nega, double %b, double %negc) + ret double %1 +} + +define double @fnmsub_d(double %a, double %b, double %c) { +; RV32IFD-LABEL: fnmsub_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a4, 8(sp) +; RV32IFD-NEXT: sw a5, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: sw a2, 8(sp) +; RV32IFD-NEXT: sw a3, 12(sp) +; RV32IFD-NEXT: fld ft1, 8(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft2, 8(sp) +; RV32IFD-NEXT: lui a0, %hi(.LCPI17_0) +; RV32IFD-NEXT: addi a0, a0, %lo(.LCPI17_0) +; RV32IFD-NEXT: fld ft3, 0(a0) +; RV32IFD-NEXT: fadd.d ft2, ft2, ft3 +; RV32IFD-NEXT: fnmsub.d ft0, ft2, ft1, ft0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret + %a_ = fadd double 0.0, %a + %nega = fsub double -0.0, %a_ + %1 = tail call double @llvm.fma.f64(double %nega, double %b, double %c) + ret double %1 +} Index: test/CodeGen/RISCV/double-intrinsics.ll =================================================================== --- test/CodeGen/RISCV/double-intrinsics.ll +++ test/CodeGen/RISCV/double-intrinsics.ll @@ -197,20 +197,31 @@ ret double %1 } -declare double @llvm.fma.f64(double, double, double) +declare double @llvm.fmuladd.f64(double, double, double) -; TODO: Select RISC-V FMA instruction. -define double @fma_f64(double %a, double %b, double %c) { -; RV32IFD-LABEL: fma_f64: +define double @fmulladd_f64(double %a, double %b, double %c) { +; Use of fmadd depends on TargetLowering::isFMAFasterthanFMulAndFAdd +; RV32IFD-LABEL: fmulladd_f64: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 -; RV32IFD-NEXT: sw ra, 12(sp) -; RV32IFD-NEXT: call fma -; RV32IFD-NEXT: lw ra, 12(sp) +; RV32IFD-NEXT: sw a2, 8(sp) +; RV32IFD-NEXT: sw a3, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft1, 8(sp) +; RV32IFD-NEXT: fmul.d ft0, ft1, ft0 +; RV32IFD-NEXT: sw a4, 8(sp) +; RV32IFD-NEXT: sw a5, 12(sp) +; RV32IFD-NEXT: fld ft1, 8(sp) +; RV32IFD-NEXT: fadd.d ft0, ft0, ft1 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret - %1 = call double @llvm.fma.f64(double %a, double %b, double %c) - ret double %1 + %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c) + ret double %1 } declare double @llvm.fabs.f64(double) Index: test/CodeGen/RISCV/float-arith.ll =================================================================== --- test/CodeGen/RISCV/float-arith.ll +++ test/CodeGen/RISCV/float-arith.ll @@ -2,6 +2,10 @@ ; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32IF %s +; These tests are each targeted at a particular RISC-V FPU instruction. Most +; other files in this folder exercise LLVM IR instructions that don't directly +; match a RISC-V instruction. + define float @fadd_s(float %a, float %b) nounwind { ; RV32IF-LABEL: fadd_s: ; RV32IF: # %bb.0: @@ -186,3 +190,78 @@ %2 = zext i1 %1 to i32 ret i32 %2 } + +declare float @llvm.fma.f32(float, float, float) + +define float @fmadd_s(float %a, float %b, float %c) { +; RV32IF-LABEL: fmadd_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a2 +; RV32IF-NEXT: fmv.w.x ft1, a1 +; RV32IF-NEXT: fmv.w.x ft2, a0 +; RV32IF-NEXT: fmadd.s ft0, ft2, ft1, ft0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret + %1 = call float @llvm.fma.f32(float %a, float %b, float %c) + ret float %1 +} + +define float @fmsub_s(float %a, float %b, float %c) { +; RV32IF-LABEL: fmsub_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a2 +; RV32IF-NEXT: lui a2, %hi(.LCPI15_0) +; RV32IF-NEXT: addi a2, a2, %lo(.LCPI15_0) +; RV32IF-NEXT: flw ft1, 0(a2) +; RV32IF-NEXT: fadd.s ft0, ft0, ft1 +; RV32IF-NEXT: fmv.w.x ft1, a1 +; RV32IF-NEXT: fmv.w.x ft2, a0 +; RV32IF-NEXT: fmsub.s ft0, ft2, ft1, ft0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret + %c_ = fadd float 0.0, %c ; avoid negation using xor + %negc = fsub float -0.0, %c_ + %1 = tail call float @llvm.fma.f32(float %a, float %b, float %negc) + ret float %1 +} + +define float @fnmadd_s(float %a, float %b, float %c) { +; RV32IF-LABEL: fnmadd_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a2 +; RV32IF-NEXT: lui a2, %hi(.LCPI16_0) +; RV32IF-NEXT: addi a2, a2, %lo(.LCPI16_0) +; RV32IF-NEXT: flw ft1, 0(a2) +; RV32IF-NEXT: fadd.s ft0, ft0, ft1 +; RV32IF-NEXT: fmv.w.x ft2, a0 +; RV32IF-NEXT: fadd.s ft1, ft2, ft1 +; RV32IF-NEXT: fmv.w.x ft2, a1 +; RV32IF-NEXT: fnmadd.s ft0, ft1, ft2, ft0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret + %a_ = fadd float 0.0, %a + %c_ = fadd float 0.0, %c + %nega = fsub float -0.0, %a_ + %negc = fsub float -0.0, %c_ + %1 = tail call float @llvm.fma.f32(float %nega, float %b, float %negc) + ret float %1 +} + +define float @fnmsub_s(float %a, float %b, float %c) { +; RV32IF-LABEL: fnmsub_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: lui a0, %hi(.LCPI17_0) +; RV32IF-NEXT: addi a0, a0, %lo(.LCPI17_0) +; RV32IF-NEXT: flw ft1, 0(a0) +; RV32IF-NEXT: fadd.s ft0, ft0, ft1 +; RV32IF-NEXT: fmv.w.x ft1, a2 +; RV32IF-NEXT: fmv.w.x ft2, a1 +; RV32IF-NEXT: fnmsub.s ft0, ft0, ft2, ft1 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret + %a_ = fadd float 0.0, %a + %nega = fsub float -0.0, %a_ + %1 = tail call float @llvm.fma.f32(float %nega, float %b, float %c) + ret float %1 +} Index: test/CodeGen/RISCV/float-intrinsics.ll =================================================================== --- test/CodeGen/RISCV/float-intrinsics.ll +++ test/CodeGen/RISCV/float-intrinsics.ll @@ -180,20 +180,21 @@ ret float %1 } -declare float @llvm.fma.f32(float, float, float) +declare float @llvm.fmuladd.f32(float, float, float) -; TODO: Select RISC-V FMA instruction. -define float @fma_f32(float %a, float %b, float %c) { -; RV32IF-LABEL: fma_f32: +define float @fmulladd_f32(float %a, float %b, float %c) { +; Use of fmadd depends on TargetLowering::isFMAFasterthanFMulAndFAdd +; RV32IF-LABEL: fmulladd_f32: ; RV32IF: # %bb.0: -; RV32IF-NEXT: addi sp, sp, -16 -; RV32IF-NEXT: sw ra, 12(sp) -; RV32IF-NEXT: call fmaf -; RV32IF-NEXT: lw ra, 12(sp) -; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: fmv.w.x ft0, a1 +; RV32IF-NEXT: fmv.w.x ft1, a0 +; RV32IF-NEXT: fmul.s ft0, ft1, ft0 +; RV32IF-NEXT: fmv.w.x ft1, a2 +; RV32IF-NEXT: fadd.s ft0, ft0, ft1 +; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret - %1 = call float @llvm.fma.f32(float %a, float %b, float %c) - ret float %1 + %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %1 } declare float @llvm.fabs.f32(float)