diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -119,6 +119,10 @@ FROUND, FPCLASS, + + // Floating point fmax and fmin matching the RISC-V instruction semantics. + FMAX, FMIN, + // READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target // (returns (Lo, Hi)). It takes a chain operand. READ_CYCLE_WIDE, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -412,6 +412,9 @@ // We need to custom promote this. if (Subtarget.is64Bit()) setOperationAction(ISD::FPOWI, MVT::i32, Custom); + + if (!Subtarget.hasStdExtZfa()) + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom); } if (Subtarget.hasStdExtFOrZfinx()) { @@ -434,6 +437,8 @@ if (Subtarget.hasStdExtZfa()) setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + else + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom); } if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit()) @@ -450,6 +455,8 @@ } else { if (Subtarget.is64Bit()) setOperationAction(FPRndMode, MVT::f64, Custom); + + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom); } setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); @@ -4613,6 +4620,34 @@ ISD::CondCode::SETNE); } +// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these +// operations propagate nans. +static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + SDValue X = Op.getOperand(0); + SDValue Y = Op.getOperand(1); + + MVT XLenVT = Subtarget.getXLenVT(); + + // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This + // ensures that when one input is a nan, the other will also be a nan allowing + // the nan to propagate. If both inputs are nan, this will swap the inputs + // which is harmless. + // FIXME: Handle nonans FMF and use isKnownNeverNaN. + SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ); + SDValue NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X); + + SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ); + SDValue NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y); + + unsigned Opc = + Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN; + return DAG.getNode(Opc, DL, VT, NewX, NewY); +} + /// Get a RISCV target specified VL op for a given SDNode. static unsigned getRISCVVLOp(SDValue Op) { #define OP_CASE(NODE) \ @@ -4931,6 +4966,9 @@ } return SDValue(); } + case ISD::FMAXIMUM: + case ISD::FMINIMUM: + return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); case ISD::FP_EXTEND: case ISD::FP_ROUND: if (!Op.getValueType().isVector()) @@ -16198,6 +16236,8 @@ NODE_NAME_CASE(STRICT_FCVT_WU_RV64) NODE_NAME_CASE(FROUND) NODE_NAME_CASE(FPCLASS) + NODE_NAME_CASE(FMAX) + NODE_NAME_CASE(FMIN) NODE_NAME_CASE(READ_CYCLE_WIDE) NODE_NAME_CASE(BREV8) NODE_NAME_CASE(ORC_B) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -386,6 +386,8 @@ foreach Ext = DExts in { defm : PatFprFpr_m; defm : PatFprFpr_m; + defm : PatFprFpr_m; + defm : PatFprFpr_m; } /// Setcc diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -51,6 +51,9 @@ def riscv_fcvt_xu : SDNode<"RISCVISD::FCVT_XU", SDT_RISCVFCVT_X>; +def riscv_fmin : SDNode<"RISCVISD::FMIN", SDTFPBinOp>; +def riscv_fmax : SDNode<"RISCVISD::FMAX", SDTFPBinOp>; + def riscv_strict_fcvt_w_rv64 : SDNode<"RISCVISD::STRICT_FCVT_W_RV64", SDT_RISCVFCVT_W_RV64, [SDNPHasChain]>; @@ -555,6 +558,8 @@ foreach Ext = FExts in { defm : PatFprFpr_m; defm : PatFprFpr_m; + defm : PatFprFpr_m; + defm : PatFprFpr_m; } /// Setcc diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -348,6 +348,8 @@ foreach Ext = ZfhExts in { defm : PatFprFpr_m; defm : PatFprFpr_m; + defm : PatFprFpr_m; + defm : PatFprFpr_m; } /// Setcc diff --git a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll --- a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll @@ -157,21 +157,21 @@ define void @minimum() { ; CHECK-LABEL: 'minimum' -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.minimum.f32(float undef, float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %2 = call <2 x float> @llvm.minimum.v2f32(<2 x float> undef, <2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %3 = call <4 x float> @llvm.minimum.v4f32(<4 x float> undef, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %4 = call <8 x float> @llvm.minimum.v8f32(<8 x float> undef, <8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %5 = call <16 x float> @llvm.minimum.v16f32(<16 x float> undef, <16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call float @llvm.minimum.f32(float undef, float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <2 x float> @llvm.minimum.v2f32(<2 x float> undef, <2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %3 = call <4 x float> @llvm.minimum.v4f32(<4 x float> undef, <4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %4 = call <8 x float> @llvm.minimum.v8f32(<8 x float> undef, <8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %5 = call <16 x float> @llvm.minimum.v16f32(<16 x float> undef, <16 x float> undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.minimum.nxv1f32( undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.minimum.nxv2f32( undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.minimum.nxv4f32( undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.minimum.nxv8f32( undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.minimum.nxv16f32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call double @llvm.minimum.f64(double undef, double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x double> @llvm.minimum.v2f64(<2 x double> undef, <2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %13 = call <4 x double> @llvm.minimum.v4f64(<4 x double> undef, <4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %14 = call <8 x double> @llvm.minimum.v8f64(<8 x double> undef, <8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %15 = call <16 x double> @llvm.minimum.v16f64(<16 x double> undef, <16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call double @llvm.minimum.f64(double undef, double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x double> @llvm.minimum.v2f64(<2 x double> undef, <2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %13 = call <4 x double> @llvm.minimum.v4f64(<4 x double> undef, <4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %14 = call <8 x double> @llvm.minimum.v8f64(<8 x double> undef, <8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %15 = call <16 x double> @llvm.minimum.v16f64(<16 x double> undef, <16 x double> undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %16 = call @llvm.minimum.nxv1f64( undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %17 = call @llvm.minimum.nxv2f64( undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %18 = call @llvm.minimum.nxv4f64( undef, undef) @@ -202,21 +202,21 @@ define void @maximum() { ; CHECK-LABEL: 'maximum' -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.maximum.f32(float undef, float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %2 = call <2 x float> @llvm.maximum.v2f32(<2 x float> undef, <2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %3 = call <4 x float> @llvm.maximum.v4f32(<4 x float> undef, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %4 = call <8 x float> @llvm.maximum.v8f32(<8 x float> undef, <8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %5 = call <16 x float> @llvm.maximum.v16f32(<16 x float> undef, <16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call float @llvm.maximum.f32(float undef, float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <2 x float> @llvm.maximum.v2f32(<2 x float> undef, <2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %3 = call <4 x float> @llvm.maximum.v4f32(<4 x float> undef, <4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %4 = call <8 x float> @llvm.maximum.v8f32(<8 x float> undef, <8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %5 = call <16 x float> @llvm.maximum.v16f32(<16 x float> undef, <16 x float> undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.maximum.nxv1f32( undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.maximum.nxv2f32( undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.maximum.nxv4f32( undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.maximum.nxv8f32( undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.maximum.nxv16f32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call double @llvm.maximum.f64(double undef, double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %12 = call <2 x double> @llvm.maximum.v2f64(<2 x double> undef, <2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %13 = call <4 x double> @llvm.maximum.v4f64(<4 x double> undef, <4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %14 = call <8 x double> @llvm.maximum.v8f64(<8 x double> undef, <8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %15 = call <16 x double> @llvm.maximum.v16f64(<16 x double> undef, <16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call double @llvm.maximum.f64(double undef, double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x double> @llvm.maximum.v2f64(<2 x double> undef, <2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %13 = call <4 x double> @llvm.maximum.v4f64(<4 x double> undef, <4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %14 = call <8 x double> @llvm.maximum.v8f64(<8 x double> undef, <8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %15 = call <16 x double> @llvm.maximum.v16f64(<16 x double> undef, <16 x double> undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %16 = call @llvm.maximum.nxv1f64( undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %17 = call @llvm.maximum.nxv2f64( undef, undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %18 = call @llvm.maximum.nxv4f64( undef, undef) diff --git a/llvm/test/CodeGen/RISCV/double-maximum-minimum.ll b/llvm/test/CodeGen/RISCV/double-maximum-minimum.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/double-maximum-minimum.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d \ +; RUN: -verify-machineinstrs -target-abi=ilp32d \ +; RUN: | FileCheck -check-prefixes=CHECKIFD,RV32IFD %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d \ +; RUN: -verify-machineinstrs -target-abi=lp64d \ +; RUN: | FileCheck -check-prefixes=CHECKIFD,RV64IFD %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zdinx \ +; RUN: -verify-machineinstrs -target-abi=ilp32 \ +; RUN: | FileCheck -check-prefix=RV32IZFINXZDINX %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zdinx \ +; RUN: -verify-machineinstrs -target-abi=lp64 \ +; RUN: | FileCheck -check-prefix=RV64IZFINXZDINX %s + +declare double @llvm.minimum.f64(double, double) + +define double @fminimum_f64(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fminimum_f64: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: feq.d a0, fa0, fa0 +; CHECKIFD-NEXT: fmv.d fa5, fa1 +; CHECKIFD-NEXT: beqz a0, .LBB0_3 +; CHECKIFD-NEXT: # %bb.1: +; CHECKIFD-NEXT: feq.d a0, fa1, fa1 +; CHECKIFD-NEXT: beqz a0, .LBB0_4 +; CHECKIFD-NEXT: .LBB0_2: +; CHECKIFD-NEXT: fmin.d fa0, fa0, fa5 +; CHECKIFD-NEXT: ret +; CHECKIFD-NEXT: .LBB0_3: +; CHECKIFD-NEXT: fmv.d fa5, fa0 +; CHECKIFD-NEXT: feq.d a0, fa1, fa1 +; CHECKIFD-NEXT: bnez a0, .LBB0_2 +; CHECKIFD-NEXT: .LBB0_4: +; CHECKIFD-NEXT: fmin.d fa0, fa1, fa5 +; CHECKIFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: fminimum_f64: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV32IZFINXZDINX-NEXT: sw a2, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a3, 12(sp) +; RV32IZFINXZDINX-NEXT: lw a2, 8(sp) +; RV32IZFINXZDINX-NEXT: lw a3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: lw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: lw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: feq.d a6, a0, a0 +; RV32IZFINXZDINX-NEXT: mv a4, a2 +; RV32IZFINXZDINX-NEXT: bnez a6, .LBB0_2 +; RV32IZFINXZDINX-NEXT: # %bb.1: +; RV32IZFINXZDINX-NEXT: mv a4, a0 +; RV32IZFINXZDINX-NEXT: .LBB0_2: +; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2 +; RV32IZFINXZDINX-NEXT: bnez a6, .LBB0_4 +; RV32IZFINXZDINX-NEXT: # %bb.3: +; RV32IZFINXZDINX-NEXT: mv a0, a2 +; RV32IZFINXZDINX-NEXT: .LBB0_4: +; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a4 +; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: lw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: lw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: fminimum_f64: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: feq.d a3, a0, a0 +; RV64IZFINXZDINX-NEXT: mv a2, a1 +; RV64IZFINXZDINX-NEXT: beqz a3, .LBB0_3 +; RV64IZFINXZDINX-NEXT: # %bb.1: +; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1 +; RV64IZFINXZDINX-NEXT: beqz a3, .LBB0_4 +; RV64IZFINXZDINX-NEXT: .LBB0_2: +; RV64IZFINXZDINX-NEXT: fmin.d a0, a0, a2 +; RV64IZFINXZDINX-NEXT: ret +; RV64IZFINXZDINX-NEXT: .LBB0_3: +; RV64IZFINXZDINX-NEXT: mv a2, a0 +; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1 +; RV64IZFINXZDINX-NEXT: bnez a3, .LBB0_2 +; RV64IZFINXZDINX-NEXT: .LBB0_4: +; RV64IZFINXZDINX-NEXT: fmin.d a0, a1, a2 +; RV64IZFINXZDINX-NEXT: ret + %1 = call double @llvm.minimum.f64(double %a, double %b) + ret double %1 +} + +declare double @llvm.maximum.f64(double, double) + +define double @fmaximum_f64(double %a, double %b) nounwind { +; CHECKIFD-LABEL: fmaximum_f64: +; CHECKIFD: # %bb.0: +; CHECKIFD-NEXT: feq.d a0, fa0, fa0 +; CHECKIFD-NEXT: fmv.d fa5, fa1 +; CHECKIFD-NEXT: beqz a0, .LBB1_3 +; CHECKIFD-NEXT: # %bb.1: +; CHECKIFD-NEXT: feq.d a0, fa1, fa1 +; CHECKIFD-NEXT: beqz a0, .LBB1_4 +; CHECKIFD-NEXT: .LBB1_2: +; CHECKIFD-NEXT: fmax.d fa0, fa0, fa5 +; CHECKIFD-NEXT: ret +; CHECKIFD-NEXT: .LBB1_3: +; CHECKIFD-NEXT: fmv.d fa5, fa0 +; CHECKIFD-NEXT: feq.d a0, fa1, fa1 +; CHECKIFD-NEXT: bnez a0, .LBB1_2 +; CHECKIFD-NEXT: .LBB1_4: +; CHECKIFD-NEXT: fmax.d fa0, fa1, fa5 +; CHECKIFD-NEXT: ret +; +; RV32IZFINXZDINX-LABEL: fmaximum_f64: +; RV32IZFINXZDINX: # %bb.0: +; RV32IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV32IZFINXZDINX-NEXT: sw a2, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a3, 12(sp) +; RV32IZFINXZDINX-NEXT: lw a2, 8(sp) +; RV32IZFINXZDINX-NEXT: lw a3, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: lw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: lw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: feq.d a6, a0, a0 +; RV32IZFINXZDINX-NEXT: mv a4, a2 +; RV32IZFINXZDINX-NEXT: bnez a6, .LBB1_2 +; RV32IZFINXZDINX-NEXT: # %bb.1: +; RV32IZFINXZDINX-NEXT: mv a4, a0 +; RV32IZFINXZDINX-NEXT: .LBB1_2: +; RV32IZFINXZDINX-NEXT: feq.d a6, a2, a2 +; RV32IZFINXZDINX-NEXT: bnez a6, .LBB1_4 +; RV32IZFINXZDINX-NEXT: # %bb.3: +; RV32IZFINXZDINX-NEXT: mv a0, a2 +; RV32IZFINXZDINX-NEXT: .LBB1_4: +; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a4 +; RV32IZFINXZDINX-NEXT: sw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: lw a0, 8(sp) +; RV32IZFINXZDINX-NEXT: lw a1, 12(sp) +; RV32IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV32IZFINXZDINX-NEXT: ret +; +; RV64IZFINXZDINX-LABEL: fmaximum_f64: +; RV64IZFINXZDINX: # %bb.0: +; RV64IZFINXZDINX-NEXT: feq.d a3, a0, a0 +; RV64IZFINXZDINX-NEXT: mv a2, a1 +; RV64IZFINXZDINX-NEXT: beqz a3, .LBB1_3 +; RV64IZFINXZDINX-NEXT: # %bb.1: +; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1 +; RV64IZFINXZDINX-NEXT: beqz a3, .LBB1_4 +; RV64IZFINXZDINX-NEXT: .LBB1_2: +; RV64IZFINXZDINX-NEXT: fmax.d a0, a0, a2 +; RV64IZFINXZDINX-NEXT: ret +; RV64IZFINXZDINX-NEXT: .LBB1_3: +; RV64IZFINXZDINX-NEXT: mv a2, a0 +; RV64IZFINXZDINX-NEXT: feq.d a3, a1, a1 +; RV64IZFINXZDINX-NEXT: bnez a3, .LBB1_2 +; RV64IZFINXZDINX-NEXT: .LBB1_4: +; RV64IZFINXZDINX-NEXT: fmax.d a0, a1, a2 +; RV64IZFINXZDINX-NEXT: ret + %1 = call double @llvm.maximum.f64(double %a, double %b) + ret double %1 +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32IFD: {{.*}} +; RV64IFD: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll b/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/float-maximum-minimum.ll @@ -0,0 +1,183 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+f \ +; RUN: -verify-machineinstrs -target-abi=ilp32f \ +; RUN: | FileCheck -check-prefix=RV32IF %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zfinx \ +; RUN: -verify-machineinstrs -target-abi=ilp32 \ +; RUN: | FileCheck -check-prefix=RV32IZFINX %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+d \ +; RUN: -verify-machineinstrs -target-abi=ilp32f \ +; RUN: | FileCheck -check-prefix=RV32IF %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+f \ +; RUN: -verify-machineinstrs -target-abi=lp64f \ +; RUN: | FileCheck -check-prefix=RV64IF %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zfinx \ +; RUN: -verify-machineinstrs -target-abi=lp64 \ +; RUN: | FileCheck -check-prefix=RV64IZFINX %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+d \ +; RUN: -verify-machineinstrs -target-abi=lp64d \ +; RUN: | FileCheck -check-prefix=RV64IF %s + +declare float @llvm.minimum.f32(float, float) + +define float @fminimum_f32(float %a, float %b) nounwind { +; RV32IF-LABEL: fminimum_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: fmv.s fa5, fa1 +; RV32IF-NEXT: beqz a0, .LBB0_3 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: feq.s a0, fa1, fa1 +; RV32IF-NEXT: beqz a0, .LBB0_4 +; RV32IF-NEXT: .LBB0_2: +; RV32IF-NEXT: fmin.s fa0, fa0, fa5 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB0_3: +; RV32IF-NEXT: fmv.s fa5, fa0 +; RV32IF-NEXT: feq.s a0, fa1, fa1 +; RV32IF-NEXT: bnez a0, .LBB0_2 +; RV32IF-NEXT: .LBB0_4: +; RV32IF-NEXT: fmin.s fa0, fa1, fa5 +; RV32IF-NEXT: ret +; +; RV32IZFINX-LABEL: fminimum_f32: +; RV32IZFINX: # %bb.0: +; RV32IZFINX-NEXT: feq.s a3, a0, a0 +; RV32IZFINX-NEXT: mv a2, a1 +; RV32IZFINX-NEXT: beqz a3, .LBB0_3 +; RV32IZFINX-NEXT: # %bb.1: +; RV32IZFINX-NEXT: feq.s a3, a1, a1 +; RV32IZFINX-NEXT: beqz a3, .LBB0_4 +; RV32IZFINX-NEXT: .LBB0_2: +; RV32IZFINX-NEXT: fmin.s a0, a0, a2 +; RV32IZFINX-NEXT: ret +; RV32IZFINX-NEXT: .LBB0_3: +; RV32IZFINX-NEXT: mv a2, a0 +; RV32IZFINX-NEXT: feq.s a3, a1, a1 +; RV32IZFINX-NEXT: bnez a3, .LBB0_2 +; RV32IZFINX-NEXT: .LBB0_4: +; RV32IZFINX-NEXT: fmin.s a0, a1, a2 +; RV32IZFINX-NEXT: ret +; +; RV64IF-LABEL: fminimum_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: fmv.s fa5, fa1 +; RV64IF-NEXT: beqz a0, .LBB0_3 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: feq.s a0, fa1, fa1 +; RV64IF-NEXT: beqz a0, .LBB0_4 +; RV64IF-NEXT: .LBB0_2: +; RV64IF-NEXT: fmin.s fa0, fa0, fa5 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB0_3: +; RV64IF-NEXT: fmv.s fa5, fa0 +; RV64IF-NEXT: feq.s a0, fa1, fa1 +; RV64IF-NEXT: bnez a0, .LBB0_2 +; RV64IF-NEXT: .LBB0_4: +; RV64IF-NEXT: fmin.s fa0, fa1, fa5 +; RV64IF-NEXT: ret +; +; RV64IZFINX-LABEL: fminimum_f32: +; RV64IZFINX: # %bb.0: +; RV64IZFINX-NEXT: feq.s a3, a0, a0 +; RV64IZFINX-NEXT: mv a2, a1 +; RV64IZFINX-NEXT: beqz a3, .LBB0_3 +; RV64IZFINX-NEXT: # %bb.1: +; RV64IZFINX-NEXT: feq.s a3, a1, a1 +; RV64IZFINX-NEXT: beqz a3, .LBB0_4 +; RV64IZFINX-NEXT: .LBB0_2: +; RV64IZFINX-NEXT: fmin.s a0, a0, a2 +; RV64IZFINX-NEXT: ret +; RV64IZFINX-NEXT: .LBB0_3: +; RV64IZFINX-NEXT: mv a2, a0 +; RV64IZFINX-NEXT: feq.s a3, a1, a1 +; RV64IZFINX-NEXT: bnez a3, .LBB0_2 +; RV64IZFINX-NEXT: .LBB0_4: +; RV64IZFINX-NEXT: fmin.s a0, a1, a2 +; RV64IZFINX-NEXT: ret + %1 = call float @llvm.minimum.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.maximum.f32(float, float) + +define float @fmaximum_f32(float %a, float %b) nounwind { +; RV32IF-LABEL: fmaximum_f32: +; RV32IF: # %bb.0: +; RV32IF-NEXT: feq.s a0, fa0, fa0 +; RV32IF-NEXT: fmv.s fa5, fa1 +; RV32IF-NEXT: beqz a0, .LBB1_3 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: feq.s a0, fa1, fa1 +; RV32IF-NEXT: beqz a0, .LBB1_4 +; RV32IF-NEXT: .LBB1_2: +; RV32IF-NEXT: fmax.s fa0, fa0, fa5 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB1_3: +; RV32IF-NEXT: fmv.s fa5, fa0 +; RV32IF-NEXT: feq.s a0, fa1, fa1 +; RV32IF-NEXT: bnez a0, .LBB1_2 +; RV32IF-NEXT: .LBB1_4: +; RV32IF-NEXT: fmax.s fa0, fa1, fa5 +; RV32IF-NEXT: ret +; +; RV32IZFINX-LABEL: fmaximum_f32: +; RV32IZFINX: # %bb.0: +; RV32IZFINX-NEXT: feq.s a3, a0, a0 +; RV32IZFINX-NEXT: mv a2, a1 +; RV32IZFINX-NEXT: beqz a3, .LBB1_3 +; RV32IZFINX-NEXT: # %bb.1: +; RV32IZFINX-NEXT: feq.s a3, a1, a1 +; RV32IZFINX-NEXT: beqz a3, .LBB1_4 +; RV32IZFINX-NEXT: .LBB1_2: +; RV32IZFINX-NEXT: fmax.s a0, a0, a2 +; RV32IZFINX-NEXT: ret +; RV32IZFINX-NEXT: .LBB1_3: +; RV32IZFINX-NEXT: mv a2, a0 +; RV32IZFINX-NEXT: feq.s a3, a1, a1 +; RV32IZFINX-NEXT: bnez a3, .LBB1_2 +; RV32IZFINX-NEXT: .LBB1_4: +; RV32IZFINX-NEXT: fmax.s a0, a1, a2 +; RV32IZFINX-NEXT: ret +; +; RV64IF-LABEL: fmaximum_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: feq.s a0, fa0, fa0 +; RV64IF-NEXT: fmv.s fa5, fa1 +; RV64IF-NEXT: beqz a0, .LBB1_3 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: feq.s a0, fa1, fa1 +; RV64IF-NEXT: beqz a0, .LBB1_4 +; RV64IF-NEXT: .LBB1_2: +; RV64IF-NEXT: fmax.s fa0, fa0, fa5 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB1_3: +; RV64IF-NEXT: fmv.s fa5, fa0 +; RV64IF-NEXT: feq.s a0, fa1, fa1 +; RV64IF-NEXT: bnez a0, .LBB1_2 +; RV64IF-NEXT: .LBB1_4: +; RV64IF-NEXT: fmax.s fa0, fa1, fa5 +; RV64IF-NEXT: ret +; +; RV64IZFINX-LABEL: fmaximum_f32: +; RV64IZFINX: # %bb.0: +; RV64IZFINX-NEXT: feq.s a3, a0, a0 +; RV64IZFINX-NEXT: mv a2, a1 +; RV64IZFINX-NEXT: beqz a3, .LBB1_3 +; RV64IZFINX-NEXT: # %bb.1: +; RV64IZFINX-NEXT: feq.s a3, a1, a1 +; RV64IZFINX-NEXT: beqz a3, .LBB1_4 +; RV64IZFINX-NEXT: .LBB1_2: +; RV64IZFINX-NEXT: fmax.s a0, a0, a2 +; RV64IZFINX-NEXT: ret +; RV64IZFINX-NEXT: .LBB1_3: +; RV64IZFINX-NEXT: mv a2, a0 +; RV64IZFINX-NEXT: feq.s a3, a1, a1 +; RV64IZFINX-NEXT: bnez a3, .LBB1_2 +; RV64IZFINX-NEXT: .LBB1_4: +; RV64IZFINX-NEXT: fmax.s a0, a1, a2 +; RV64IZFINX-NEXT: ret + %1 = call float @llvm.maximum.f32(float %a, float %b) + ret float %1 +} diff --git a/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll b/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-maximum-minimum.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zfh \ +; RUN: -verify-machineinstrs -target-abi ilp32f | \ +; RUN: FileCheck -check-prefixes=CHECKIZFH %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zfh \ +; RUN: -verify-machineinstrs -target-abi lp64f | \ +; RUN: FileCheck -check-prefixes=CHECKIZFH %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zhinx \ +; RUN: -verify-machineinstrs -target-abi ilp32 | \ +; RUN: FileCheck -check-prefixes=CHECKIZHINX %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zhinx \ +; RUN: -verify-machineinstrs -target-abi lp64 | \ +; RUN: FileCheck -check-prefixes=CHECKIZHINX %s + +declare half @llvm.minimum.f16(half, half) + +define half @fminimum_f16(half %a, half %b) nounwind { +; CHECKIZFH-LABEL: fminimum_f16: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 +; CHECKIZFH-NEXT: fmv.h fa5, fa1 +; CHECKIZFH-NEXT: beqz a0, .LBB0_3 +; CHECKIZFH-NEXT: # %bb.1: +; CHECKIZFH-NEXT: feq.h a0, fa1, fa1 +; CHECKIZFH-NEXT: beqz a0, .LBB0_4 +; CHECKIZFH-NEXT: .LBB0_2: +; CHECKIZFH-NEXT: fmin.h fa0, fa0, fa5 +; CHECKIZFH-NEXT: ret +; CHECKIZFH-NEXT: .LBB0_3: +; CHECKIZFH-NEXT: fmv.h fa5, fa0 +; CHECKIZFH-NEXT: feq.h a0, fa1, fa1 +; CHECKIZFH-NEXT: bnez a0, .LBB0_2 +; CHECKIZFH-NEXT: .LBB0_4: +; CHECKIZFH-NEXT: fmin.h fa0, fa1, fa5 +; CHECKIZFH-NEXT: ret +; +; CHECKIZHINX-LABEL: fminimum_f16: +; CHECKIZHINX: # %bb.0: +; CHECKIZHINX-NEXT: feq.h a3, a0, a0 +; CHECKIZHINX-NEXT: mv a2, a1 +; CHECKIZHINX-NEXT: beqz a3, .LBB0_3 +; CHECKIZHINX-NEXT: # %bb.1: +; CHECKIZHINX-NEXT: feq.h a3, a1, a1 +; CHECKIZHINX-NEXT: beqz a3, .LBB0_4 +; CHECKIZHINX-NEXT: .LBB0_2: +; CHECKIZHINX-NEXT: fmin.h a0, a0, a2 +; CHECKIZHINX-NEXT: ret +; CHECKIZHINX-NEXT: .LBB0_3: +; CHECKIZHINX-NEXT: mv a2, a0 +; CHECKIZHINX-NEXT: feq.h a3, a1, a1 +; CHECKIZHINX-NEXT: bnez a3, .LBB0_2 +; CHECKIZHINX-NEXT: .LBB0_4: +; CHECKIZHINX-NEXT: fmin.h a0, a1, a2 +; CHECKIZHINX-NEXT: ret + %1 = call half @llvm.minimum.f16(half %a, half %b) + ret half %1 +} + +declare half @llvm.maximum.f16(half, half) + +define half @fmaximum_f16(half %a, half %b) nounwind { +; CHECKIZFH-LABEL: fmaximum_f16: +; CHECKIZFH: # %bb.0: +; CHECKIZFH-NEXT: feq.h a0, fa0, fa0 +; CHECKIZFH-NEXT: fmv.h fa5, fa1 +; CHECKIZFH-NEXT: beqz a0, .LBB1_3 +; CHECKIZFH-NEXT: # %bb.1: +; CHECKIZFH-NEXT: feq.h a0, fa1, fa1 +; CHECKIZFH-NEXT: beqz a0, .LBB1_4 +; CHECKIZFH-NEXT: .LBB1_2: +; CHECKIZFH-NEXT: fmax.h fa0, fa0, fa5 +; CHECKIZFH-NEXT: ret +; CHECKIZFH-NEXT: .LBB1_3: +; CHECKIZFH-NEXT: fmv.h fa5, fa0 +; CHECKIZFH-NEXT: feq.h a0, fa1, fa1 +; CHECKIZFH-NEXT: bnez a0, .LBB1_2 +; CHECKIZFH-NEXT: .LBB1_4: +; CHECKIZFH-NEXT: fmax.h fa0, fa1, fa5 +; CHECKIZFH-NEXT: ret +; +; CHECKIZHINX-LABEL: fmaximum_f16: +; CHECKIZHINX: # %bb.0: +; CHECKIZHINX-NEXT: feq.h a3, a0, a0 +; CHECKIZHINX-NEXT: mv a2, a1 +; CHECKIZHINX-NEXT: beqz a3, .LBB1_3 +; CHECKIZHINX-NEXT: # %bb.1: +; CHECKIZHINX-NEXT: feq.h a3, a1, a1 +; CHECKIZHINX-NEXT: beqz a3, .LBB1_4 +; CHECKIZHINX-NEXT: .LBB1_2: +; CHECKIZHINX-NEXT: fmax.h a0, a0, a2 +; CHECKIZHINX-NEXT: ret +; CHECKIZHINX-NEXT: .LBB1_3: +; CHECKIZHINX-NEXT: mv a2, a0 +; CHECKIZHINX-NEXT: feq.h a3, a1, a1 +; CHECKIZHINX-NEXT: bnez a3, .LBB1_2 +; CHECKIZHINX-NEXT: .LBB1_4: +; CHECKIZHINX-NEXT: fmax.h a0, a1, a2 +; CHECKIZHINX-NEXT: ret + %1 = call half @llvm.maximum.f16(half %a, half %b) + ret half %1 +}