diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -172,6 +172,8 @@ VECREDUCE_XOR_VL, VECREDUCE_FADD_VL, VECREDUCE_SEQ_FADD_VL, + VECREDUCE_FMIN_VL, + VECREDUCE_FMAX_VL, // Vector binary and unary ops with a mask as a third operand, and VL as a // fourth operand. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -550,6 +550,8 @@ setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); + setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); + setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); setOperationAction(ISD::FCOPYSIGN, VT, Legal); setOperationAction(ISD::MLOAD, VT, Custom); @@ -746,6 +748,8 @@ setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); + setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); + setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom); } // Custom-legalize bitcasts from fixed-length vectors to scalar types. @@ -2226,6 +2230,8 @@ return lowerVECREDUCE(Op, DAG); case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_SEQ_FADD: + case ISD::VECREDUCE_FMIN: + case ISD::VECREDUCE_FMAX: return lowerFPVECREDUCE(Op, DAG); case ISD::INSERT_SUBVECTOR: return lowerINSERT_SUBVECTOR(Op, DAG); @@ -3476,7 +3482,10 @@ static std::tuple getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { SDLoc DL(Op); - switch (Op.getOpcode()) { + auto Flags = Op->getFlags(); + unsigned Opcode = Op.getOpcode(); + unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode); + switch (Opcode) { default: llvm_unreachable("Unhandled reduction"); case ISD::VECREDUCE_FADD: @@ -3485,6 +3494,12 @@ case ISD::VECREDUCE_SEQ_FADD: return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), Op.getOperand(0)); + case ISD::VECREDUCE_FMIN: + return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0), + DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); + case ISD::VECREDUCE_FMAX: + return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0), + DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); } } @@ -7762,6 +7777,8 @@ NODE_NAME_CASE(VECREDUCE_XOR_VL) NODE_NAME_CASE(VECREDUCE_FADD_VL) NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL) + NODE_NAME_CASE(VECREDUCE_FMIN_VL) + NODE_NAME_CASE(VECREDUCE_FMAX_VL) NODE_NAME_CASE(ADD_VL) NODE_NAME_CASE(AND_VL) NODE_NAME_CASE(MUL_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -226,7 +226,7 @@ ]>; foreach kind = ["ADD", "UMAX", "SMAX", "UMIN", "SMIN", "AND", "OR", "XOR", - "FADD", "SEQ_FADD"] in + "FADD", "SEQ_FADD", "FMIN", "FMAX"] in def rvv_vecreduce_#kind#_vl : SDNode<"RISCVISD::VECREDUCE_"#kind#"_VL", SDTRVVVecReduce>; // Ignore the vl operand. @@ -736,6 +736,8 @@ let Predicates = [HasStdExtV, HasStdExtF] in { defm : VPatReductionVL; defm : VPatReductionVL; +defm : VPatReductionVL; +defm : VPatReductionVL; } // Predicates = [HasStdExtV, HasStdExtF] let Predicates = [HasStdExtV, HasStdExtF] in { diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -112,18 +112,6 @@ // These reductions have no equivalent in RVV case Intrinsic::vector_reduce_mul: case Intrinsic::vector_reduce_fmul: - // The fmin and fmax intrinsics are not currently supported due to a - // discrepancy between the LLVM semantics and the RVV 0.10 ISA behaviour with - // regards to signaling NaNs: the vector fmin/fmax reduction intrinsics match - // the behaviour minnum/maxnum intrinsics, whereas the vfredmin/vfredmax - // instructions match the vfmin/vfmax instructions which match the equivalent - // scalar fmin/fmax instructions as defined in 2.2 F/D/Q extension (see - // https://bugs.llvm.org/show_bug.cgi?id=27363). - // This behaviour is likely fixed in version 2.3 of the RISC-V F/D/Q - // extension, where fmin/fmax behave like minnum/maxnum, but until then the - // intrinsics are left unsupported. - case Intrinsic::vector_reduce_fmax: - case Intrinsic::vector_reduce_fmin: return true; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -791,3 +791,625 @@ %red = call double @llvm.vector.reduce.fadd.v32f64(double %s, <32 x double> %v) ret double %red } + +declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>) + +define half @vreduce_fmin_v2f16(<2 x half>* %x) { +; CHECK-LABEL: vreduce_fmin_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI42_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI42_0)(a1) +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <2 x half>, <2 x half>* %x + %red = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> %v) + ret half %red +} + +declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) + +define half @vreduce_fmin_v4f16(<4 x half>* %x) { +; CHECK-LABEL: vreduce_fmin_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI43_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI43_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x half>, <4 x half>* %x + %red = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) + ret half %red +} + +define half @vreduce_fmin_v4f16_nonans(<4 x half>* %x) { +; CHECK-LABEL: vreduce_fmin_v4f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI44_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI44_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x half>, <4 x half>* %x + %red = call nnan half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) + ret half %red +} + +define half @vreduce_fmin_v4f16_nonans_noinfs(<4 x half>* %x) { +; CHECK-LABEL: vreduce_fmin_v4f16_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI45_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI45_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x half>, <4 x half>* %x + %red = call nnan ninf half @llvm.vector.reduce.fmin.v4f16(<4 x half> %v) + ret half %red +} + +declare half @llvm.vector.reduce.fmin.v128f16(<128 x half>) + +define half @vreduce_fmin_v128f16(<128 x half>* %x) { +; CHECK-LABEL: vreduce_fmin_v128f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: lui a0, %hi(.LCPI46_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI46_0)(a0) +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <128 x half>, <128 x half>* %x + %red = call half @llvm.vector.reduce.fmin.v128f16(<128 x half> %v) + ret half %red +} + +declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) + +define float @vreduce_fmin_v2f32(<2 x float>* %x) { +; CHECK-LABEL: vreduce_fmin_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI47_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI47_0)(a1) +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <2 x float>, <2 x float>* %x + %red = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %v) + ret float %red +} + +declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) + +define float @vreduce_fmin_v4f32(<4 x float>* %x) { +; CHECK-LABEL: vreduce_fmin_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI48_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI48_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x float>, <4 x float>* %x + %red = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) + ret float %red +} + +define float @vreduce_fmin_v4f32_nonans(<4 x float>* %x) { +; CHECK-LABEL: vreduce_fmin_v4f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI49_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI49_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x float>, <4 x float>* %x + %red = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) + ret float %red +} + +define float @vreduce_fmin_v4f32_nonans_noinfs(<4 x float>* %x) { +; CHECK-LABEL: vreduce_fmin_v4f32_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI50_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI50_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x float>, <4 x float>* %x + %red = call nnan ninf float @llvm.vector.reduce.fmin.v4f32(<4 x float> %v) + ret float %red +} + +declare float @llvm.vector.reduce.fmin.v128f32(<128 x float>) + +define float @vreduce_fmin_v128f32(<128 x float>* %x) { +; CHECK-LABEL: vreduce_fmin_v128f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: addi a2, a0, 384 +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: addi a2, a0, 128 +; CHECK-NEXT: vle32.v v16, (a2) +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: addi a0, a0, 256 +; CHECK-NEXT: vle32.v v0, (a0) +; CHECK-NEXT: vfmin.vv v8, v16, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI51_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI51_0)(a0) +; CHECK-NEXT: vfmin.vv v16, v24, v0 +; CHECK-NEXT: vfmin.vv v8, v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <128 x float>, <128 x float>* %x + %red = call float @llvm.vector.reduce.fmin.v128f32(<128 x float> %v) + ret float %red +} + +declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) + +define double @vreduce_fmin_v2f64(<2 x double>* %x) { +; CHECK-LABEL: vreduce_fmin_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI52_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI52_0)(a1) +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <2 x double>, <2 x double>* %x + %red = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %v) + ret double %red +} + +declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) + +define double @vreduce_fmin_v4f64(<4 x double>* %x) { +; CHECK-LABEL: vreduce_fmin_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI53_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI53_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x double>, <4 x double>* %x + %red = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) + ret double %red +} + +define double @vreduce_fmin_v4f64_nonans(<4 x double>* %x) { +; CHECK-LABEL: vreduce_fmin_v4f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI54_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI54_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x double>, <4 x double>* %x + %red = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) + ret double %red +} + +define double @vreduce_fmin_v4f64_nonans_noinfs(<4 x double>* %x) { +; CHECK-LABEL: vreduce_fmin_v4f64_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI55_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI55_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x double>, <4 x double>* %x + %red = call nnan ninf double @llvm.vector.reduce.fmin.v4f64(<4 x double> %v) + ret double %red +} + +declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>) + +define double @vreduce_fmin_v32f64(<32 x double>* %x) { +; CHECK-LABEL: vreduce_fmin_v32f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: lui a0, %hi(.LCPI56_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI56_0)(a0) +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <32 x double>, <32 x double>* %x + %red = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %v) + ret double %red +} + +declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>) + +define half @vreduce_fmax_v2f16(<2 x half>* %x) { +; CHECK-LABEL: vreduce_fmax_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI57_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI57_0)(a1) +; CHECK-NEXT: vsetivli a1, 2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 2, e16,m1,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <2 x half>, <2 x half>* %x + %red = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> %v) + ret half %red +} + +declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>) + +define half @vreduce_fmax_v4f16(<4 x half>* %x) { +; CHECK-LABEL: vreduce_fmax_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI58_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI58_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x half>, <4 x half>* %x + %red = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v) + ret half %red +} + +define half @vreduce_fmax_v4f16_nonans(<4 x half>* %x) { +; CHECK-LABEL: vreduce_fmax_v4f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI59_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI59_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x half>, <4 x half>* %x + %red = call nnan half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v) + ret half %red +} + +define half @vreduce_fmax_v4f16_nonans_noinfs(<4 x half>* %x) { +; CHECK-LABEL: vreduce_fmax_v4f16_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI60_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI60_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 4, e16,m1,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x half>, <4 x half>* %x + %red = call nnan ninf half @llvm.vector.reduce.fmax.v4f16(<4 x half> %v) + ret half %red +} + +declare half @llvm.vector.reduce.fmax.v128f16(<128 x half>) + +define half @vreduce_fmax_v128f16(<128 x half>* %x) { +; CHECK-LABEL: vreduce_fmax_v128f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 64 +; CHECK-NEXT: vsetvli a2, a1, e16,m8,ta,mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: lui a0, %hi(.LCPI61_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI61_0)(a0) +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, a1, e16,m8,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <128 x half>, <128 x half>* %x + %red = call half @llvm.vector.reduce.fmax.v128f16(<128 x half> %v) + ret half %red +} + +declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) + +define float @vreduce_fmax_v2f32(<2 x float>* %x) { +; CHECK-LABEL: vreduce_fmax_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI62_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI62_0)(a1) +; CHECK-NEXT: vsetivli a1, 2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 2, e32,m1,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <2 x float>, <2 x float>* %x + %red = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %v) + ret float %red +} + +declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) + +define float @vreduce_fmax_v4f32(<4 x float>* %x) { +; CHECK-LABEL: vreduce_fmax_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI63_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI63_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x float>, <4 x float>* %x + %red = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) + ret float %red +} + +define float @vreduce_fmax_v4f32_nonans(<4 x float>* %x) { +; CHECK-LABEL: vreduce_fmax_v4f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI64_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI64_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x float>, <4 x float>* %x + %red = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) + ret float %red +} + +define float @vreduce_fmax_v4f32_nonans_noinfs(<4 x float>* %x) { +; CHECK-LABEL: vreduce_fmax_v4f32_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI65_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI65_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 4, e32,m1,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x float>, <4 x float>* %x + %red = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> %v) + ret float %red +} + +declare float @llvm.vector.reduce.fmax.v128f32(<128 x float>) + +define float @vreduce_fmax_v128f32(<128 x float>* %x) { +; CHECK-LABEL: vreduce_fmax_v128f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 32 +; CHECK-NEXT: vsetvli a2, a1, e32,m8,ta,mu +; CHECK-NEXT: addi a2, a0, 384 +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: addi a2, a0, 128 +; CHECK-NEXT: vle32.v v16, (a2) +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: addi a0, a0, 256 +; CHECK-NEXT: vle32.v v0, (a0) +; CHECK-NEXT: vfmax.vv v8, v16, v8 +; CHECK-NEXT: lui a0, %hi(.LCPI66_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI66_0)(a0) +; CHECK-NEXT: vfmax.vv v16, v24, v0 +; CHECK-NEXT: vfmax.vv v8, v16, v8 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, a1, e32,m8,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <128 x float>, <128 x float>* %x + %red = call float @llvm.vector.reduce.fmax.v128f32(<128 x float> %v) + ret float %red +} + +declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) + +define double @vreduce_fmax_v2f64(<2 x double>* %x) { +; CHECK-LABEL: vreduce_fmax_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI67_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI67_0)(a1) +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v26, ft0 +; CHECK-NEXT: vsetivli a0, 2, e64,m1,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v25, v26 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <2 x double>, <2 x double>* %x + %red = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %v) + ret double %red +} + +declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) + +define double @vreduce_fmax_v4f64(<4 x double>* %x) { +; CHECK-LABEL: vreduce_fmax_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI68_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI68_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x double>, <4 x double>* %x + %red = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) + ret double %red +} + +define double @vreduce_fmax_v4f64_nonans(<4 x double>* %x) { +; CHECK-LABEL: vreduce_fmax_v4f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI69_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI69_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x double>, <4 x double>* %x + %red = call nnan double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) + ret double %red +} + +define double @vreduce_fmax_v4f64_nonans_noinfs(<4 x double>* %x) { +; CHECK-LABEL: vreduce_fmax_v4f64_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI70_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI70_0)(a1) +; CHECK-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; CHECK-NEXT: vle64.v v26, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetivli a0, 4, e64,m2,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v26, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <4 x double>, <4 x double>* %x + %red = call nnan ninf double @llvm.vector.reduce.fmax.v4f64(<4 x double> %v) + ret double %red +} + +declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>) + +define double @vreduce_fmax_v32f64(<32 x double>* %x) { +; CHECK-LABEL: vreduce_fmax_v32f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e64,m8,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: lui a0, %hi(.LCPI71_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI71_0)(a0) +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetivli a0, 16, e64,m8,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %v = load <32 x double>, <32 x double>* %x + %red = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %v) + ret double %red +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -270,3 +270,619 @@ %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %v) ret double %red } + +declare half @llvm.vector.reduce.fmin.nxv1f16() + +define half @vreduce_fmin_nxv1f16( %v) { +; CHECK-LABEL: vreduce_fmin_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI18_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call half @llvm.vector.reduce.fmin.nxv1f16( %v) + ret half %red +} + +define half @vreduce_fmin_nxv1f16_nonans( %v) #0 { +; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI19_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call nnan half @llvm.vector.reduce.fmin.nxv1f16( %v) + ret half %red +} + +define half @vreduce_fmin_nxv1f16_nonans_noinfs( %v) #1 { +; CHECK-LABEL: vreduce_fmin_nxv1f16_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI20_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call nnan ninf half @llvm.vector.reduce.fmin.nxv1f16( %v) + ret half %red +} + +declare half @llvm.vector.reduce.fmin.nxv2f16() + +define half @vreduce_fmin_nxv2f16( %v) { +; CHECK-LABEL: vreduce_fmin_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI21_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI21_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call half @llvm.vector.reduce.fmin.nxv2f16( %v) + ret half %red +} + +declare half @llvm.vector.reduce.fmin.nxv4f16() + +define half @vreduce_fmin_nxv4f16( %v) { +; CHECK-LABEL: vreduce_fmin_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI22_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI22_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call half @llvm.vector.reduce.fmin.nxv4f16( %v) + ret half %red +} + +declare half @llvm.vector.reduce.fmin.nxv64f16() + +define half @vreduce_fmin_nxv64f16( %v) { +; CHECK-LABEL: vreduce_fmin_nxv64f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI23_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI23_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call half @llvm.vector.reduce.fmin.nxv64f16( %v) + ret half %red +} + +declare float @llvm.vector.reduce.fmin.nxv1f32() + +define float @vreduce_fmin_nxv1f32( %v) { +; CHECK-LABEL: vreduce_fmin_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI24_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI24_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call float @llvm.vector.reduce.fmin.nxv1f32( %v) + ret float %red +} + +define float @vreduce_fmin_nxv1f32_nonans( %v) { +; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI25_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI25_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call nnan float @llvm.vector.reduce.fmin.nxv1f32( %v) + ret float %red +} + +define float @vreduce_fmin_nxv1f32_nonans_noinfs( %v) { +; CHECK-LABEL: vreduce_fmin_nxv1f32_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI26_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI26_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call nnan ninf float @llvm.vector.reduce.fmin.nxv1f32( %v) + ret float %red +} + +declare float @llvm.vector.reduce.fmin.nxv2f32() + +define float @vreduce_fmin_nxv2f32( %v) { +; CHECK-LABEL: vreduce_fmin_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI27_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI27_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call float @llvm.vector.reduce.fmin.nxv2f32( %v) + ret float %red +} + +declare float @llvm.vector.reduce.fmin.nxv4f32() + +define float @vreduce_fmin_nxv4f32( %v) { +; CHECK-LABEL: vreduce_fmin_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI28_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI28_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call float @llvm.vector.reduce.fmin.nxv4f32( %v) + ret float %red +} + +declare float @llvm.vector.reduce.fmin.nxv32f32() + +define float @vreduce_fmin_nxv32f32( %v) { +; CHECK-LABEL: vreduce_fmin_nxv32f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI29_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI29_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call float @llvm.vector.reduce.fmin.nxv32f32( %v) + ret float %red +} + +declare double @llvm.vector.reduce.fmin.nxv1f64() + +define double @vreduce_fmin_nxv1f64( %v) { +; CHECK-LABEL: vreduce_fmin_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI30_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI30_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call double @llvm.vector.reduce.fmin.nxv1f64( %v) + ret double %red +} + +define double @vreduce_fmin_nxv1f64_nonans( %v) { +; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI31_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI31_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call nnan double @llvm.vector.reduce.fmin.nxv1f64( %v) + ret double %red +} + +define double @vreduce_fmin_nxv1f64_nonans_noinfs( %v) { +; CHECK-LABEL: vreduce_fmin_nxv1f64_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI32_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI32_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call nnan ninf double @llvm.vector.reduce.fmin.nxv1f64( %v) + ret double %red +} + +declare double @llvm.vector.reduce.fmin.nxv2f64() + +define double @vreduce_fmin_nxv2f64( %v) { +; CHECK-LABEL: vreduce_fmin_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI33_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI33_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call double @llvm.vector.reduce.fmin.nxv2f64( %v) + ret double %red +} + +declare double @llvm.vector.reduce.fmin.nxv4f64() + +define double @vreduce_fmin_nxv4f64( %v) { +; CHECK-LABEL: vreduce_fmin_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI34_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI34_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call double @llvm.vector.reduce.fmin.nxv4f64( %v) + ret double %red +} + +declare double @llvm.vector.reduce.fmin.nxv16f64() + +define double @vreduce_fmin_nxv16f64( %v) { +; CHECK-LABEL: vreduce_fmin_nxv16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI35_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI35_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfredmin.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call double @llvm.vector.reduce.fmin.nxv16f64( %v) + ret double %red +} + +declare half @llvm.vector.reduce.fmax.nxv1f16() + +define half @vreduce_fmax_nxv1f16( %v) { +; CHECK-LABEL: vreduce_fmax_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI36_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI36_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call half @llvm.vector.reduce.fmax.nxv1f16( %v) + ret half %red +} + +define half @vreduce_fmax_nxv1f16_nonans( %v) #0 { +; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI37_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI37_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call nnan half @llvm.vector.reduce.fmax.nxv1f16( %v) + ret half %red +} + +define half @vreduce_fmax_nxv1f16_nonans_noinfs( %v) #1 { +; CHECK-LABEL: vreduce_fmax_nxv1f16_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI38_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI38_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call nnan ninf half @llvm.vector.reduce.fmax.nxv1f16( %v) + ret half %red +} + +declare half @llvm.vector.reduce.fmax.nxv2f16() + +define half @vreduce_fmax_nxv2f16( %v) { +; CHECK-LABEL: vreduce_fmax_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI39_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI39_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call half @llvm.vector.reduce.fmax.nxv2f16( %v) + ret half %red +} + +declare half @llvm.vector.reduce.fmax.nxv4f16() + +define half @vreduce_fmax_nxv4f16( %v) { +; CHECK-LABEL: vreduce_fmax_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI40_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI40_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call half @llvm.vector.reduce.fmax.nxv4f16( %v) + ret half %red +} + +declare half @llvm.vector.reduce.fmax.nxv64f16() + +define half @vreduce_fmax_nxv64f16( %v) { +; CHECK-LABEL: vreduce_fmax_nxv64f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI41_0) +; CHECK-NEXT: flh ft0, %lo(.LCPI41_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call half @llvm.vector.reduce.fmax.nxv64f16( %v) + ret half %red +} + +declare float @llvm.vector.reduce.fmax.nxv1f32() + +define float @vreduce_fmax_nxv1f32( %v) { +; CHECK-LABEL: vreduce_fmax_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI42_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI42_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call float @llvm.vector.reduce.fmax.nxv1f32( %v) + ret float %red +} + +define float @vreduce_fmax_nxv1f32_nonans( %v) { +; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI43_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI43_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call nnan float @llvm.vector.reduce.fmax.nxv1f32( %v) + ret float %red +} + +define float @vreduce_fmax_nxv1f32_nonans_noinfs( %v) { +; CHECK-LABEL: vreduce_fmax_nxv1f32_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI44_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI44_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call nnan ninf float @llvm.vector.reduce.fmax.nxv1f32( %v) + ret float %red +} + +declare float @llvm.vector.reduce.fmax.nxv2f32() + +define float @vreduce_fmax_nxv2f32( %v) { +; CHECK-LABEL: vreduce_fmax_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI45_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI45_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call float @llvm.vector.reduce.fmax.nxv2f32( %v) + ret float %red +} + +declare float @llvm.vector.reduce.fmax.nxv4f32() + +define float @vreduce_fmax_nxv4f32( %v) { +; CHECK-LABEL: vreduce_fmax_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI46_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI46_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call float @llvm.vector.reduce.fmax.nxv4f32( %v) + ret float %red +} + +declare float @llvm.vector.reduce.fmax.nxv32f32() + +define float @vreduce_fmax_nxv32f32( %v) { +; CHECK-LABEL: vreduce_fmax_nxv32f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI47_0) +; CHECK-NEXT: flw ft0, %lo(.LCPI47_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call float @llvm.vector.reduce.fmax.nxv32f32( %v) + ret float %red +} + +declare double @llvm.vector.reduce.fmax.nxv1f64() + +define double @vreduce_fmax_nxv1f64( %v) { +; CHECK-LABEL: vreduce_fmax_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI48_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI48_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call double @llvm.vector.reduce.fmax.nxv1f64( %v) + ret double %red +} + +define double @vreduce_fmax_nxv1f64_nonans( %v) { +; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI49_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI49_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call nnan double @llvm.vector.reduce.fmax.nxv1f64( %v) + ret double %red +} + +define double @vreduce_fmax_nxv1f64_nonans_noinfs( %v) { +; CHECK-LABEL: vreduce_fmax_nxv1f64_nonans_noinfs: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI50_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI50_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call nnan ninf double @llvm.vector.reduce.fmax.nxv1f64( %v) + ret double %red +} + +declare double @llvm.vector.reduce.fmax.nxv2f64() + +define double @vreduce_fmax_nxv2f64( %v) { +; CHECK-LABEL: vreduce_fmax_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI51_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI51_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call double @llvm.vector.reduce.fmax.nxv2f64( %v) + ret double %red +} + +declare double @llvm.vector.reduce.fmax.nxv4f64() + +define double @vreduce_fmax_nxv4f64( %v) { +; CHECK-LABEL: vreduce_fmax_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI52_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI52_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call double @llvm.vector.reduce.fmax.nxv4f64( %v) + ret double %red +} + +declare double @llvm.vector.reduce.fmax.nxv16f64() + +define double @vreduce_fmax_nxv16f64( %v) { +; CHECK-LABEL: vreduce_fmax_nxv16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI53_0) +; CHECK-NEXT: fld ft0, %lo(.LCPI53_0)(a0) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.v.f v25, ft0 +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vfredmax.vs v25, v8, v25 +; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu +; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: ret + %red = call double @llvm.vector.reduce.fmax.nxv16f64( %v) + ret double %red +}