diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -831,6 +831,7 @@ setCondCodeAction(VFPCCToExpand, VT, Expand); setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal); + setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom); setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, @@ -1102,7 +1103,7 @@ setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV, ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT, ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM, - ISD::IS_FPCLASS}, + ISD::IS_FPCLASS, ISD::FMAXIMUM, ISD::FMINIMUM}, VT, Custom); setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); @@ -4662,33 +4663,74 @@ static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { SDLoc DL(Op); - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); SDValue X = Op.getOperand(0); SDValue Y = Op.getOperand(1); - MVT XLenVT = Subtarget.getXLenVT(); + if (!VT.isVector()) { + MVT XLenVT = Subtarget.getXLenVT(); + + // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This + // ensures that when one input is a nan, the other will also be a nan + // allowing the nan to propagate. If both inputs are nan, this will swap the + // inputs which is harmless. - // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This - // ensures that when one input is a nan, the other will also be a nan allowing - // the nan to propagate. If both inputs are nan, this will swap the inputs - // which is harmless. + SDValue NewY = Y; + if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) { + SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ); + NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X); + } - SDValue NewY = Y;; - if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) { - SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ); - NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X); + SDValue NewX = X; + if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) { + SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ); + NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y); + } + + unsigned Opc = + Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN; + return DAG.getNode(Opc, DL, VT, NewX, NewY); + } + + // Check no NaNs before converting to fixed vector scalable. + bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X); + bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y); + + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); + Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget); + } + + auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + + SDValue NewY = Y; + if (!XIsNeverNan) { + SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), + {X, X, DAG.getCondCode(ISD::SETOEQ), + DAG.getUNDEF(ContainerVT), Mask, VL}); + NewY = + DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, XIsNonNan, Y, X, VL); } SDValue NewX = X; - if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) { - SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ); - NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y); + if (!YIsNeverNan) { + SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(), + {Y, Y, DAG.getCondCode(ISD::SETOEQ), + DAG.getUNDEF(ContainerVT), Mask, VL}); + NewX = + DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, YIsNonNan, X, Y, VL); } unsigned Opc = - Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN; - return DAG.getNode(Opc, DL, VT, NewX, NewY); + Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::VFMAX_VL : RISCVISD::VFMIN_VL; + SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY, + DAG.getUNDEF(ContainerVT), Mask, VL); + if (VT.isFixedLengthVector()) + Res = convertFromScalableVector(VT, Res, DAG, Subtarget); + return Res; } /// Get a RISCV target specified VL op for a given SDNode. diff --git a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll --- a/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fp-min-max-abs.ll @@ -158,24 +158,24 @@ define void @minimum() { ; CHECK-LABEL: 'minimum' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call float @llvm.minimum.f32(float undef, float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <2 x float> @llvm.minimum.v2f32(<2 x float> undef, <2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %3 = call <4 x float> @llvm.minimum.v4f32(<4 x float> undef, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %4 = call <8 x float> @llvm.minimum.v8f32(<8 x float> undef, <8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %5 = call <16 x float> @llvm.minimum.v16f32(<16 x float> undef, <16 x float> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.minimum.nxv1f32( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.minimum.nxv2f32( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.minimum.nxv4f32( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.minimum.nxv8f32( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.minimum.nxv16f32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x float> @llvm.minimum.v2f32(<2 x float> undef, <2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x float> @llvm.minimum.v4f32(<4 x float> undef, <4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x float> @llvm.minimum.v8f32(<8 x float> undef, <8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x float> @llvm.minimum.v16f32(<16 x float> undef, <16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call @llvm.minimum.nxv1f32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call @llvm.minimum.nxv2f32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call @llvm.minimum.nxv4f32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call @llvm.minimum.nxv8f32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.minimum.nxv16f32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call double @llvm.minimum.f64(double undef, double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x double> @llvm.minimum.v2f64(<2 x double> undef, <2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %13 = call <4 x double> @llvm.minimum.v4f64(<4 x double> undef, <4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %14 = call <8 x double> @llvm.minimum.v8f64(<8 x double> undef, <8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %15 = call <16 x double> @llvm.minimum.v16f64(<16 x double> undef, <16 x double> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %16 = call @llvm.minimum.nxv1f64( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %17 = call @llvm.minimum.nxv2f64( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %18 = call @llvm.minimum.nxv4f64( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %19 = call @llvm.minimum.nxv8f64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x double> @llvm.minimum.v2f64(<2 x double> undef, <2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.minimum.v4f64(<4 x double> undef, <4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x double> @llvm.minimum.v8f64(<8 x double> undef, <8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x double> @llvm.minimum.v16f64(<16 x double> undef, <16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call @llvm.minimum.nxv1f64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.minimum.nxv2f64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call @llvm.minimum.nxv4f64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.minimum.nxv8f64( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; call float @llvm.minimum.f32(float undef, float undef) @@ -203,24 +203,24 @@ define void @maximum() { ; CHECK-LABEL: 'maximum' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call float @llvm.maximum.f32(float undef, float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <2 x float> @llvm.maximum.v2f32(<2 x float> undef, <2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %3 = call <4 x float> @llvm.maximum.v4f32(<4 x float> undef, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %4 = call <8 x float> @llvm.maximum.v8f32(<8 x float> undef, <8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %5 = call <16 x float> @llvm.maximum.v16f32(<16 x float> undef, <16 x float> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.maximum.nxv1f32( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.maximum.nxv2f32( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.maximum.nxv4f32( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.maximum.nxv8f32( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.maximum.nxv16f32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = call <2 x float> @llvm.maximum.v2f32(<2 x float> undef, <2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %3 = call <4 x float> @llvm.maximum.v4f32(<4 x float> undef, <4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = call <8 x float> @llvm.maximum.v8f32(<8 x float> undef, <8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call <16 x float> @llvm.maximum.v16f32(<16 x float> undef, <16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %6 = call @llvm.maximum.nxv1f32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %7 = call @llvm.maximum.nxv2f32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %8 = call @llvm.maximum.nxv4f32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %9 = call @llvm.maximum.nxv8f32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.maximum.nxv16f32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call double @llvm.maximum.f64(double undef, double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x double> @llvm.maximum.v2f64(<2 x double> undef, <2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %13 = call <4 x double> @llvm.maximum.v4f64(<4 x double> undef, <4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %14 = call <8 x double> @llvm.maximum.v8f64(<8 x double> undef, <8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %15 = call <16 x double> @llvm.maximum.v16f64(<16 x double> undef, <16 x double> undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %16 = call @llvm.maximum.nxv1f64( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %17 = call @llvm.maximum.nxv2f64( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %18 = call @llvm.maximum.nxv4f64( undef, undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %19 = call @llvm.maximum.nxv8f64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call <2 x double> @llvm.maximum.v2f64(<2 x double> undef, <2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %13 = call <4 x double> @llvm.maximum.v4f64(<4 x double> undef, <4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <8 x double> @llvm.maximum.v8f64(<8 x double> undef, <8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x double> @llvm.maximum.v16f64(<16 x double> undef, <16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %16 = call @llvm.maximum.nxv1f64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %17 = call @llvm.maximum.nxv2f64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call @llvm.maximum.nxv4f64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.maximum.nxv8f64( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; call float @llvm.maximum.f32(float undef, float undef) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll @@ -0,0 +1,266 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s + +declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>) + +define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) { +; CHECK-LABEL: vfmax_v2f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %v +} + +declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>) + +define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: vfmax_v4f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %v +} + +declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>) + +define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: vfmax_v8f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call <8 x half> @llvm.maximum.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %v +} + +declare <16 x half> @llvm.maximum.v16f16(<16 x half>, <16 x half>) + +define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) { +; CHECK-LABEL: vfmax_v16f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v12, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: ret + %v = call <16 x half> @llvm.maximum.v16f16(<16 x half> %a, <16 x half> %b) + ret <16 x half> %v +} + +declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>) + +define <2 x float> @vfmax_v2f32_vv(<2 x float> %a, <2 x float> %b) { +; CHECK-LABEL: vfmax_v2f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) + ret <2 x float> %v +} + +declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) + +define <4 x float> @vfmax_v4f32_vv(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: vfmax_v4f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %v +} + +declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>) + +define <8 x float> @vfmax_v8f32_vv(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: vfmax_v8f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v12, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: ret + %v = call <8 x float> @llvm.maximum.v8f32(<8 x float> %a, <8 x float> %b) + ret <8 x float> %v +} + +declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>) + +define <16 x float> @vfmax_v16f32_vv(<16 x float> %a, <16 x float> %b) { +; CHECK-LABEL: vfmax_v16f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v16, v12, v12 +; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: ret + %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %a, <16 x float> %b) + ret <16 x float> %v +} + +declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) + +define <2 x double> @vfmax_v2f64_vv(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: vfmax_v2f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %v +} + +declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>) + +define <4 x double> @vfmax_v4f64_vv(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: vfmax_v4f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v12, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: ret + %v = call <4 x double> @llvm.maximum.v4f64(<4 x double> %a, <4 x double> %b) + ret <4 x double> %v +} + +declare <8 x double> @llvm.maximum.v8f64(<8 x double>, <8 x double>) + +define <8 x double> @vfmax_v8f64_vv(<8 x double> %a, <8 x double> %b) { +; CHECK-LABEL: vfmax_v8f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v16, v12, v12 +; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: ret + %v = call <8 x double> @llvm.maximum.v8f64(<8 x double> %a, <8 x double> %b) + ret <8 x double> %v +} + +declare <16 x double> @llvm.maximum.v16f64(<16 x double>, <16 x double>) + +define <16 x double> @vfmax_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwind { +; CHECK-LABEL: vfmax_v16f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v1, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <16 x double> @llvm.maximum.v16f64(<16 x double> %a, <16 x double> %b) + ret <16 x double> %v +} + +define <2 x half> @vfmax_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) { +; CHECK-LABEL: vfmax_v2f16_vv_nnan: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call nnan <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %v +} + +; FIXME: The nnan from fadd isn't propagating. +define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { +; CHECK-LABEL: vfmax_v2f16_vv_nnana: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v10, v8, v8 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v8, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: ret + %c = fadd nnan <2 x half> %a, %a + %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %b) + ret <2 x half> %v +} + +; FIXME: The nnan from fadd isn't propagating. +define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { +; CHECK-LABEL: vfmax_v2f16_vv_nnanb: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v10, v9, v9 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v9, v8, v8 +; CHECK-NEXT: vmerge.vvm v11, v10, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vfmax.vv v8, v11, v8 +; CHECK-NEXT: ret + %c = fadd nnan <2 x half> %b, %b + %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c) + ret <2 x half> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll @@ -0,0 +1,266 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s + +declare <2 x half> @llvm.minimum.v2f16(<2 x half>, <2 x half>) + +define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) { +; CHECK-LABEL: vfmin_v2f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %v +} + +declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>) + +define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) { +; CHECK-LABEL: vfmin_v4f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b) + ret <4 x half> %v +} + +declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>) + +define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: vfmin_v8f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call <8 x half> @llvm.minimum.v8f16(<8 x half> %a, <8 x half> %b) + ret <8 x half> %v +} + +declare <16 x half> @llvm.minimum.v16f16(<16 x half>, <16 x half>) + +define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) { +; CHECK-LABEL: vfmin_v16f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v12, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: ret + %v = call <16 x half> @llvm.minimum.v16f16(<16 x half> %a, <16 x half> %b) + ret <16 x half> %v +} + +declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>) + +define <2 x float> @vfmin_v2f32_vv(<2 x float> %a, <2 x float> %b) { +; CHECK-LABEL: vfmin_v2f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b) + ret <2 x float> %v +} + +declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>) + +define <4 x float> @vfmin_v4f32_vv(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: vfmin_v4f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %v +} + +declare <8 x float> @llvm.minimum.v8f32(<8 x float>, <8 x float>) + +define <8 x float> @vfmin_v8f32_vv(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: vfmin_v8f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v12, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: ret + %v = call <8 x float> @llvm.minimum.v8f32(<8 x float> %a, <8 x float> %b) + ret <8 x float> %v +} + +declare <16 x float> @llvm.minimum.v16f32(<16 x float>, <16 x float>) + +define <16 x float> @vfmin_v16f32_vv(<16 x float> %a, <16 x float> %b) { +; CHECK-LABEL: vfmin_v16f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v16, v12, v12 +; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: ret + %v = call <16 x float> @llvm.minimum.v16f32(<16 x float> %a, <16 x float> %b) + ret <16 x float> %v +} + +declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>) + +define <2 x double> @vfmin_v2f64_vv(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: vfmin_v2f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %v +} + +declare <4 x double> @llvm.minimum.v4f64(<4 x double>, <4 x double>) + +define <4 x double> @vfmin_v4f64_vv(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: vfmin_v4f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v12, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: ret + %v = call <4 x double> @llvm.minimum.v4f64(<4 x double> %a, <4 x double> %b) + ret <4 x double> %v +} + +declare <8 x double> @llvm.minimum.v8f64(<8 x double>, <8 x double>) + +define <8 x double> @vfmin_v8f64_vv(<8 x double> %a, <8 x double> %b) { +; CHECK-LABEL: vfmin_v8f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v16, v12, v12 +; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: ret + %v = call <8 x double> @llvm.minimum.v8f64(<8 x double> %a, <8 x double> %b) + ret <8 x double> %v +} + +declare <16 x double> @llvm.minimum.v16f64(<16 x double>, <16 x double>) + +define <16 x double> @vfmin_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwind { +; CHECK-LABEL: vfmin_v16f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v1, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <16 x double> @llvm.minimum.v16f64(<16 x double> %a, <16 x double> %b) + ret <16 x double> %v +} + +define <2 x half> @vfmin_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) { +; CHECK-LABEL: vfmin_v2f16_vv_nnan: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call nnan <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b) + ret <2 x half> %v +} + +; FIXME: The nnan from fadd isn't propagating. +define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { +; CHECK-LABEL: vfmin_v2f16_vv_nnana: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v10, v8, v8 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v8, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v10, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: ret + %c = fadd nnan <2 x half> %a, %a + %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %c, <2 x half> %b) + ret <2 x half> %v +} + +; FIXME: The nnan from fadd isn't propagating. +define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { +; CHECK-LABEL: vfmin_v2f16_vv_nnanb: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v10, v9, v9 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v9, v8, v8 +; CHECK-NEXT: vmerge.vvm v11, v10, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: vfmin.vv v8, v11, v8 +; CHECK-NEXT: ret + %c = fadd nnan <2 x half> %b, %b + %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %c) + ret <2 x half> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll @@ -0,0 +1,331 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.maximum.nxv1f16(, ) + +define @vfmax_nxv1f16_vv( %a, %b) { +; CHECK-LABEL: vfmax_nxv1f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv1f16( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv2f16(, ) + +define @vfmax_nxv2f16_vv( %a, %b) { +; CHECK-LABEL: vfmax_nxv2f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv2f16( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv4f16(, ) + +define @vfmax_nxv4f16_vv( %a, %b) { +; CHECK-LABEL: vfmax_nxv4f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv4f16( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv8f16(, ) + +define @vfmax_nxv8f16_vv( %a, %b) { +; CHECK-LABEL: vfmax_nxv8f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v12, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv8f16( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv16f16(, ) + +define @vfmax_nxv16f16_vv( %a, %b) { +; CHECK-LABEL: vfmax_nxv16f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v16, v12, v12 +; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv16f16( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv32f16(, ) + +define @vfmax_nxv32f16_vv( %a, %b) nounwind { +; CHECK-LABEL: vfmax_nxv32f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v1, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv32f16( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv1f32(, ) + +define @vfmax_nxv1f32_vv( %a, %b) { +; CHECK-LABEL: vfmax_nxv1f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv1f32( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv2f32(, ) + +define @vfmax_nxv2f32_vv( %a, %b) { +; CHECK-LABEL: vfmax_nxv2f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv2f32( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv4f32(, ) + +define @vfmax_nxv4f32_vv( %a, %b) { +; CHECK-LABEL: vfmax_nxv4f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v12, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv4f32( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv8f32(, ) + +define @vfmax_nxv8f32_vv( %a, %b) { +; CHECK-LABEL: vfmax_nxv8f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v16, v12, v12 +; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv8f32( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv16f32(, ) + +define @vfmax_nxv16f32_vv( %a, %b) nounwind { +; CHECK-LABEL: vfmax_nxv16f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v1, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv16f32( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv1f64(, ) + +define @vfmax_nxv1f64_vv( %a, %b) { +; CHECK-LABEL: vfmax_nxv1f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv1f64( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv2f64(, ) + +define @vfmax_nxv2f64_vv( %a, %b) { +; CHECK-LABEL: vfmax_nxv2f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v12, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv2f64( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv4f64(, ) + +define @vfmax_nxv4f64_vv( %a, %b) { +; CHECK-LABEL: vfmax_nxv4f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v16, v12, v12 +; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv4f64( %a, %b) + ret %v +} + +declare @llvm.maximum.nxv8f64(, ) + +define @vfmax_nxv8f64_vv( %a, %b) nounwind { +; CHECK-LABEL: vfmax_nxv8f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v1, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.maximum.nxv8f64( %a, %b) + ret %v +} + +define @vfmax_nxv1f16_vv_nnan( %a, %b) { +; CHECK-LABEL: vfmax_nxv1f16_vv_nnan: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call nnan @llvm.maximum.nxv1f16( %a, %b) + ret %v +} + +define @vfmax_nxv1f16_vv_nnana( %a, %b) { +; CHECK-LABEL: vfmax_nxv1f16_vv_nnana: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vfadd.vv v10, v8, v8, v0.t +; CHECK-NEXT: vfmax.vv v8, v10, v9 +; CHECK-NEXT: ret + %c = fadd nnan %a, %a + %v = call @llvm.maximum.nxv1f16( %c, %b) + ret %v +} + +define @vfmax_nxv1f16_vv_nnanb( %a, %b) { +; CHECK-LABEL: vfmax_nxv1f16_vv_nnanb: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vfadd.vv v10, v9, v9, v0.t +; CHECK-NEXT: vfmax.vv v8, v8, v10 +; CHECK-NEXT: ret + %c = fadd nnan %b, %b + %v = call @llvm.maximum.nxv1f16( %a, %c) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll @@ -0,0 +1,331 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.minimum.nxv1f16(, ) + +define @vfmin_nxv1f16_vv( %a, %b) { +; CHECK-LABEL: vfmin_nxv1f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv1f16( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv2f16(, ) + +define @vfmin_nxv2f16_vv( %a, %b) { +; CHECK-LABEL: vfmin_nxv2f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv2f16( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv4f16(, ) + +define @vfmin_nxv4f16_vv( %a, %b) { +; CHECK-LABEL: vfmin_nxv4f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv4f16( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv8f16(, ) + +define @vfmin_nxv8f16_vv( %a, %b) { +; CHECK-LABEL: vfmin_nxv8f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v12, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv8f16( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv16f16(, ) + +define @vfmin_nxv16f16_vv( %a, %b) { +; CHECK-LABEL: vfmin_nxv16f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v16, v12, v12 +; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv16f16( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv32f16(, ) + +define @vfmin_nxv32f16_vv( %a, %b) nounwind { +; CHECK-LABEL: vfmin_nxv32f16_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v1, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv32f16( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv1f32(, ) + +define @vfmin_nxv1f32_vv( %a, %b) { +; CHECK-LABEL: vfmin_nxv1f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv1f32( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv2f32(, ) + +define @vfmin_nxv2f32_vv( %a, %b) { +; CHECK-LABEL: vfmin_nxv2f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv2f32( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv4f32(, ) + +define @vfmin_nxv4f32_vv( %a, %b) { +; CHECK-LABEL: vfmin_nxv4f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v12, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv4f32( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv8f32(, ) + +define @vfmin_nxv8f32_vv( %a, %b) { +; CHECK-LABEL: vfmin_nxv8f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v16, v12, v12 +; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv8f32( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv16f32(, ) + +define @vfmin_nxv16f32_vv( %a, %b) nounwind { +; CHECK-LABEL: vfmin_nxv16f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v1, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv16f32( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv1f64(, ) + +define @vfmin_nxv1f64_vv( %a, %b) { +; CHECK-LABEL: vfmin_nxv1f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v10, v9, v9 +; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv1f64( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv2f64(, ) + +define @vfmin_nxv2f64_vv( %a, %b) { +; CHECK-LABEL: vfmin_nxv2f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v12, v10, v10 +; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv2f64( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv4f64(, ) + +define @vfmin_nxv4f64_vv( %a, %b) { +; CHECK-LABEL: vfmin_nxv4f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v16, v12, v12 +; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv4f64( %a, %b) + ret %v +} + +declare @llvm.minimum.nxv8f64(, ) + +define @vfmin_nxv8f64_vv( %a, %b) nounwind { +; CHECK-LABEL: vfmin_nxv8f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v1, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call @llvm.minimum.nxv8f64( %a, %b) + ret %v +} + +define @vfmin_nxv1f16_vv_nnan( %a, %b) { +; CHECK-LABEL: vfmin_nxv1f16_vv_nnan: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v9 +; CHECK-NEXT: ret + %v = call nnan @llvm.minimum.nxv1f16( %a, %b) + ret %v +} + +define @vfmin_nxv1f16_vv_nnana( %a, %b) { +; CHECK-LABEL: vfmin_nxv1f16_vv_nnana: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vfadd.vv v10, v8, v8, v0.t +; CHECK-NEXT: vfmin.vv v8, v10, v9 +; CHECK-NEXT: ret + %c = fadd nnan %a, %a + %v = call @llvm.minimum.nxv1f16( %c, %b) + ret %v +} + +define @vfmin_nxv1f16_vv_nnanb( %a, %b) { +; CHECK-LABEL: vfmin_nxv1f16_vv_nnanb: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vfadd.vv v10, v9, v9, v0.t +; CHECK-NEXT: vfmin.vv v8, v8, v10 +; CHECK-NEXT: ret + %c = fadd nnan %b, %b + %v = call @llvm.minimum.nxv1f16( %a, %c) + ret %v +}