diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -294,6 +294,10 @@ def : Pat<(any_fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)), (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; +// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA) +def : Pat<(fneg (any_fma_nsz FPR64:$rs1, FPR64:$rs2, FPR64:$rs3)), + (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; + // The ratified 20191213 ISA spec defines fmin and fmax in a way that matches // LLVM's fminnum and fmaxnum. // . diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -53,6 +53,10 @@ [(riscv_strict_fcvt_wu_rv64 node:$src, node:$frm), (riscv_fcvt_wu_rv64 node:$src, node:$frm)]>; +def any_fma_nsz : PatFrag<(ops node:$rs1, node:$rs2, node:$rs3), + (any_fma node:$rs1, node:$rs2, node:$rs3), [{ + return N->getFlags().hasNoSignedZeros(); +}]>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. //===----------------------------------------------------------------------===// @@ -520,6 +524,10 @@ def : Pat<(any_fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)), (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; +// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA) +def : Pat<(fneg (any_fma_nsz FPR32:$rs1, FPR32:$rs2, FPR32:$rs3)), + (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; + // The ratified 20191213 ISA spec defines fmin and fmax in a way that matches // LLVM's fminnum and fmaxnum // . diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -296,6 +296,10 @@ def : Pat<(any_fma (fneg FPR16:$rs1), FPR16:$rs2, (fneg FPR16:$rs3)), (FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; +// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA) +def : Pat<(fneg (any_fma_nsz FPR16:$rs1, FPR16:$rs2, FPR16:$rs3)), + (FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; + // The ratified 20191213 ISA spec defines fmin and fmax in a way that matches // LLVM's fminnum and fmaxnum // . diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll --- a/llvm/test/CodeGen/RISCV/double-arith.ll +++ b/llvm/test/CodeGen/RISCV/double-arith.ll @@ -723,6 +723,85 @@ ret double %1 } +define double @fnmadd_d_3(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmadd_d_3: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: fmadd.d ft0, fa0, fa1, fa2 +; RV32IFD-NEXT: fneg.d fa0, ft0 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmadd_d_3: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmadd.d ft0, fa0, fa1, fa2 +; RV64IFD-NEXT: fneg.d fa0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmadd_d_3: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fma@plt +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_d_3: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fma@plt +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.fma.f64(double %a, double %b, double %c) + %neg = fneg double %1 + ret double %neg +} + + +define double @fnmadd_nsz(double %a, double %b, double %c) nounwind { +; RV32IFD-LABEL: fnmadd_nsz: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: fnmadd.d fa0, fa0, fa1, fa2 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fnmadd_nsz: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fnmadd.d fa0, fa0, fa1, fa2 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fnmadd_nsz: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fma@plt +; RV32I-NEXT: lui a2, 524288 +; RV32I-NEXT: xor a1, a1, a2 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_nsz: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fma@plt +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call nsz double @llvm.fma.f64(double %a, double %b, double %c) + %neg = fneg nsz double %1 + ret double %neg +} + define double @fnmsub_d(double %a, double %b, double %c) nounwind { ; RV32IFD-LABEL: fnmsub_d: ; RV32IFD: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll --- a/llvm/test/CodeGen/RISCV/float-arith.ll +++ b/llvm/test/CodeGen/RISCV/float-arith.ll @@ -692,6 +692,82 @@ ret float %1 } +define float @fnmadd_s_3(float %a, float %b, float %c) nounwind { +; RV32IF-LABEL: fnmadd_s_3: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmadd.s ft0, fa0, fa1, fa2 +; RV32IF-NEXT: fneg.s fa0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fnmadd_s_3: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmadd.s ft0, fa0, fa1, fa2 +; RV64IF-NEXT: fneg.s fa0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fnmadd_s_3: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaf@plt +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_s_3: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaf@plt +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.fma.f32(float %a, float %b, float %c) + %neg = fneg float %1 + ret float %neg +} + +define float @fnmadd_nsz(float %a, float %b, float %c) nounwind { +; RV32IF-LABEL: fnmadd_nsz: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fnmadd.s fa0, fa0, fa1, fa2 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fnmadd_nsz: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fnmadd.s fa0, fa0, fa1, fa2 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fnmadd_nsz: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call fmaf@plt +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_nsz: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call fmaf@plt +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call nsz float @llvm.fma.f32(float %a, float %b, float %c) + %neg = fneg nsz float %1 + ret float %neg +} + define float @fnmsub_s(float %a, float %b, float %c) nounwind { ; RV32IF-LABEL: fnmsub_s: ; RV32IF: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll --- a/llvm/test/CodeGen/RISCV/half-arith.ll +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -1240,6 +1240,179 @@ ret half %1 } +define half @fnmadd_s_3(half %a, half %b, half %c) nounwind { +; RV32IZFH-LABEL: fnmadd_s_3: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fmadd.h ft0, fa0, fa1, fa2 +; RV32IZFH-NEXT: fneg.h fa0, ft0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fnmadd_s_3: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fmadd.h ft0, fa0, fa1, fa2 +; RV64IZFH-NEXT: fneg.h fa0, ft0 +; RV64IZFH-NEXT: ret +; +; RV32I-LABEL: fnmadd_s_3: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 +; RV32I-NEXT: call __extendhfsf2@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 +; RV32I-NEXT: call __extendhfsf2@plt +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 +; RV32I-NEXT: call __extendhfsf2@plt +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call fmaf@plt +; RV32I-NEXT: call __truncsfhf2@plt +; RV32I-NEXT: lui a1, 1048568 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_s_3: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 +; RV64I-NEXT: call __extendhfsf2@plt +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 +; RV64I-NEXT: call __extendhfsf2@plt +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 +; RV64I-NEXT: call __extendhfsf2@plt +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call fmaf@plt +; RV64I-NEXT: call __truncsfhf2@plt +; RV64I-NEXT: lui a1, 1048568 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %1 = call half @llvm.fma.f16(half %a, half %b, half %c) + %neg = fneg half %1 + ret half %neg +} + + +define half @fnmadd_nsz(half %a, half %b, half %c) nounwind { +; RV32IZFH-LABEL: fnmadd_nsz: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fnmadd.h fa0, fa0, fa1, fa2 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fnmadd_nsz: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fnmadd.h fa0, fa0, fa1, fa2 +; RV64IZFH-NEXT: ret +; +; RV32I-LABEL: fnmadd_nsz: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a2 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi s3, a1, -1 +; RV32I-NEXT: and a0, a0, s3 +; RV32I-NEXT: call __extendhfsf2@plt +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: and a0, s1, s3 +; RV32I-NEXT: call __extendhfsf2@plt +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: and a0, s0, s3 +; RV32I-NEXT: call __extendhfsf2@plt +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: call fmaf@plt +; RV32I-NEXT: call __truncsfhf2@plt +; RV32I-NEXT: lui a1, 1048568 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fnmadd_nsz: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: lui a1, 16 +; RV64I-NEXT: addiw s3, a1, -1 +; RV64I-NEXT: and a0, a0, s3 +; RV64I-NEXT: call __extendhfsf2@plt +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: and a0, s1, s3 +; RV64I-NEXT: call __extendhfsf2@plt +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: and a0, s0, s3 +; RV64I-NEXT: call __extendhfsf2@plt +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: call fmaf@plt +; RV64I-NEXT: call __truncsfhf2@plt +; RV64I-NEXT: lui a1, 1048568 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: ret + %1 = call nsz half @llvm.fma.f16(half %a, half %b, half %c) + %neg = fneg nsz half %1 + ret half %neg +} + define half @fnmsub_s(half %a, half %b, half %c) nounwind { ; RV32IZFH-LABEL: fnmsub_s: ; RV32IZFH: # %bb.0: