diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -822,11 +822,12 @@ // TODO: support more ops. static const unsigned ZvfhminPromoteOps[] = { - ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, - ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, - ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL, - ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT, - ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SPLAT_VECTOR}; + ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, + ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, + ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL, + ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT, + ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SPLAT_VECTOR, ISD::FMAXIMUM, + ISD::FMINIMUM}; // TODO: support more vp ops. static const unsigned ZvfhminPromoteVPOps[] = { @@ -5499,6 +5500,10 @@ } case ISD::FMAXIMUM: case ISD::FMINIMUM: + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVectorOp(Op, DAG); return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); case ISD::FP_EXTEND: { SDLoc DL(Op); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll @@ -1,22 +1,42 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>) define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) { -; CHECK-LABEL: vfmax_v2f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_v2f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v10, v9, v9 +; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %v } @@ -24,16 +44,32 @@ declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>) define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) { -; CHECK-LABEL: vfmax_v4f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_v4f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v10, v9, v9 +; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v4f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 +; ZVFHMIN-NEXT: vmv.v.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %a, <4 x half> %b) ret <4 x half> %v } @@ -41,16 +77,32 @@ declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>) define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) { -; CHECK-LABEL: vfmax_v8f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_v8f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v10, v9, v9 +; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 +; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v8f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.maximum.v8f16(<8 x half> %a, <8 x half> %b) ret <8 x half> %v } @@ -58,16 +110,32 @@ declare <16 x half> @llvm.maximum.v16f16(<16 x half>, <16 x half>) define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) { -; CHECK-LABEL: vfmax_v16f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_v16f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v12, v10, v10 +; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 +; ZVFH-NEXT: vfmax.vv v8, v8, v14 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v16f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.maximum.v16f16(<16 x half> %a, <16 x half> %b) ret <16 x half> %v } @@ -220,28 +288,62 @@ } define <2 x half> @vfmax_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) { -; CHECK-LABEL: vfmax_v2f16_vv_nnan: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_v2f16_vv_nnan: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call nnan <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %v } ; FIXME: The nnan from fadd isn't propagating. define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { -; CHECK-LABEL: vfmax_v2f16_vv_nnana: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfadd.vv v10, v8, v8 -; CHECK-NEXT: vmfeq.vv v0, v9, v9 -; CHECK-NEXT: vmfeq.vv v8, v10, v10 -; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 -; CHECK-NEXT: vfmax.vv v8, v11, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_v2f16_vv_nnana: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vv v10, v8, v8 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 +; ZVFH-NEXT: vmfeq.vv v8, v10, v10 +; ZVFH-NEXT: vmerge.vvm v11, v9, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v8 +; ZVFH-NEXT: vmerge.vvm v8, v10, v9, v0 +; ZVFH-NEXT: vfmax.vv v8, v11, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnana: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %c = fadd nnan <2 x half> %a, %a %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %c, <2 x half> %b) ret <2 x half> %v @@ -249,17 +351,40 @@ ; FIXME: The nnan from fadd isn't propagating. define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { -; CHECK-LABEL: vfmax_v2f16_vv_nnanb: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfadd.vv v10, v9, v9 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v9, v10, v10 -; CHECK-NEXT: vmerge.vvm v11, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_v2f16_vv_nnanb: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vv v10, v9, v9 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v9, v10, v10 +; ZVFH-NEXT: vmerge.vvm v11, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v9 +; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 +; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnanb: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 +; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %c = fadd nnan <2 x half> %b, %b %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c) ret <2 x half> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll @@ -1,22 +1,42 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare <2 x half> @llvm.minimum.v2f16(<2 x half>, <2 x half>) define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) { -; CHECK-LABEL: vfmin_v2f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_v2f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v10, v9, v9 +; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %v } @@ -24,16 +44,32 @@ declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>) define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) { -; CHECK-LABEL: vfmin_v4f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_v4f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v10, v9, v9 +; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v4f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 +; ZVFHMIN-NEXT: vmv.v.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.minimum.v4f16(<4 x half> %a, <4 x half> %b) ret <4 x half> %v } @@ -41,16 +77,32 @@ declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>) define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) { -; CHECK-LABEL: vfmin_v8f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_v8f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v10, v9, v9 +; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 +; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v8f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vfmin.vv v10, v8, v14 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.minimum.v8f16(<8 x half> %a, <8 x half> %b) ret <8 x half> %v } @@ -58,16 +110,32 @@ declare <16 x half> @llvm.minimum.v16f16(<16 x half>, <16 x half>) define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) { -; CHECK-LABEL: vfmin_v16f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_v16f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v12, v10, v10 +; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 +; ZVFH-NEXT: vfmin.vv v8, v8, v14 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v16f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vfmin.vv v12, v8, v20 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.minimum.v16f16(<16 x half> %a, <16 x half> %b) ret <16 x half> %v } @@ -220,28 +288,62 @@ } define <2 x half> @vfmin_v2f16_vv_nnan(<2 x half> %a, <2 x half> %b) { -; CHECK-LABEL: vfmin_v2f16_vv_nnan: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_v2f16_vv_nnan: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnan: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call nnan <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %b) ret <2 x half> %v } ; FIXME: The nnan from fadd isn't propagating. define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { -; CHECK-LABEL: vfmin_v2f16_vv_nnana: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfadd.vv v10, v8, v8 -; CHECK-NEXT: vmfeq.vv v0, v9, v9 -; CHECK-NEXT: vmfeq.vv v8, v10, v10 -; CHECK-NEXT: vmerge.vvm v11, v9, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 -; CHECK-NEXT: vfmin.vv v8, v11, v8 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_v2f16_vv_nnana: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vv v10, v8, v8 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 +; ZVFH-NEXT: vmfeq.vv v8, v10, v10 +; ZVFH-NEXT: vmerge.vvm v11, v9, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v8 +; ZVFH-NEXT: vmerge.vvm v8, v10, v9, v0 +; ZVFH-NEXT: vfmin.vv v8, v11, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnana: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %c = fadd nnan <2 x half> %a, %a %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %c, <2 x half> %b) ret <2 x half> %v @@ -249,17 +351,40 @@ ; FIXME: The nnan from fadd isn't propagating. define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { -; CHECK-LABEL: vfmin_v2f16_vv_nnanb: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vfadd.vv v10, v9, v9 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v9, v10, v10 -; CHECK-NEXT: vmerge.vvm v11, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_v2f16_vv_nnanb: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vv v10, v9, v9 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v9, v10, v10 +; ZVFH-NEXT: vmerge.vvm v11, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v9 +; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 +; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnanb: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 +; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %c = fadd nnan <2 x half> %b, %b %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %c) ret <2 x half> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll @@ -1,22 +1,42 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.maximum.nxv1f16(, ) define @vfmax_nxv1f16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv1f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv1f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v10, v9, v9 +; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv1f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.maximum.nxv1f16( %a, %b) ret %v } @@ -24,16 +44,32 @@ declare @llvm.maximum.nxv2f16(, ) define @vfmax_nxv2f16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv2f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv2f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v10, v9, v9 +; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv2f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 +; ZVFHMIN-NEXT: vmv.v.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.maximum.nxv2f16( %a, %b) ret %v } @@ -41,16 +77,32 @@ declare @llvm.maximum.nxv4f16(, ) define @vfmax_nxv4f16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv4f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv4f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v10, v9, v9 +; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 +; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv4f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.maximum.nxv4f16( %a, %b) ret %v } @@ -58,16 +110,32 @@ declare @llvm.maximum.nxv8f16(, ) define @vfmax_nxv8f16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv8f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv8f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v12, v10, v10 +; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 +; ZVFH-NEXT: vfmax.vv v8, v8, v14 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv8f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.maximum.nxv8f16( %a, %b) ret %v } @@ -75,16 +143,45 @@ declare @llvm.maximum.nxv16f16(, ) define @vfmax_nxv16f16_vv( %a, %b) { -; CHECK-LABEL: vfmax_nxv16f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv16f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v16, v12, v12 +; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 +; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 +; ZVFH-NEXT: vfmax.vv v8, v8, v20 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv16f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: sub sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmfeq.vv v1, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call @llvm.maximum.nxv16f16( %a, %b) ret %v } @@ -92,27 +189,100 @@ declare @llvm.maximum.nxv32f16(, ) define @vfmax_nxv32f16_vv( %a, %b) nounwind { -; CHECK-LABEL: vfmax_nxv32f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v1, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv32f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi sp, sp, -16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: sub sp, sp, a0 +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v1, v16, v16 +; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFH-NEXT: addi a0, sp, 16 +; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: vmv1r.v v0, v1 +; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 +; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vfmax.vv v8, v8, v16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add sp, sp, a0 +; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv32f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: sub sp, sp, a0 +; ZVFHMIN-NEXT: vmv8r.v v24, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv8r.v v0, v8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmfeq.vv v1, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v8, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmax.vv v24, v8, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmfeq.vv v1, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v8, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v16, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call @llvm.maximum.nxv32f16( %a, %b) ret %v } @@ -293,38 +463,95 @@ } define @vfmax_nxv1f16_vv_nnan( %a, %b) { -; CHECK-LABEL: vfmax_nxv1f16_vv_nnan: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv1f16_vv_nnan: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfmax.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv1f16_vv_nnan: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call nnan @llvm.maximum.nxv1f16( %a, %b) ret %v } define @vfmax_nxv1f16_vv_nnana( %a, %b) { -; CHECK-LABEL: vfmax_nxv1f16_vv_nnana: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v9, v9 -; CHECK-NEXT: vmv1r.v v10, v9 -; CHECK-NEXT: vfadd.vv v10, v8, v8, v0.t -; CHECK-NEXT: vfmax.vv v8, v10, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv1f16_vv_nnana: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 +; ZVFH-NEXT: vmv1r.v v10, v9 +; ZVFH-NEXT: vfadd.vv v10, v8, v8, v0.t +; ZVFH-NEXT: vfmax.vv v8, v10, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv1f16_vv_nnana: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %c = fadd nnan %a, %a %v = call @llvm.maximum.nxv1f16( %c, %b) ret %v } define @vfmax_nxv1f16_vv_nnanb( %a, %b) { -; CHECK-LABEL: vfmax_nxv1f16_vv_nnanb: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmv1r.v v10, v8 -; CHECK-NEXT: vfadd.vv v10, v9, v9, v0.t -; CHECK-NEXT: vfmax.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmax_nxv1f16_vv_nnanb: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmv1r.v v10, v8 +; ZVFH-NEXT: vfadd.vv v10, v9, v9, v0.t +; ZVFH-NEXT: vfmax.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_nxv1f16_vv_nnanb: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 +; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %c = fadd nnan %b, %b %v = call @llvm.maximum.nxv1f16( %a, %c) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll @@ -1,22 +1,42 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN declare @llvm.minimum.nxv1f16(, ) define @vfmin_nxv1f16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv1f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv1f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v10, v9, v9 +; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv1f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.minimum.nxv1f16( %a, %b) ret %v } @@ -24,16 +44,32 @@ declare @llvm.minimum.nxv2f16(, ) define @vfmin_nxv2f16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv2f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv2f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v10, v9, v9 +; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv2f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 +; ZVFHMIN-NEXT: vmv.v.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call @llvm.minimum.nxv2f16( %a, %b) ret %v } @@ -41,16 +77,32 @@ declare @llvm.minimum.nxv4f16(, ) define @vfmin_nxv4f16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv4f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv4f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v10, v9, v9 +; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 +; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv4f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v14, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vfmin.vv v10, v8, v14 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: ret %v = call @llvm.minimum.nxv4f16( %a, %b) ret %v } @@ -58,16 +110,32 @@ declare @llvm.minimum.nxv8f16(, ) define @vfmin_nxv8f16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv8f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv8f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v12, v10, v10 +; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 +; ZVFH-NEXT: vfmin.vv v8, v8, v14 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv8f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v20, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vfmin.vv v12, v8, v20 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: ret %v = call @llvm.minimum.nxv8f16( %a, %b) ret %v } @@ -75,16 +143,45 @@ declare @llvm.minimum.nxv16f16(, ) define @vfmin_nxv16f16_vv( %a, %b) { -; CHECK-LABEL: vfmin_nxv16f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv16f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v16, v12, v12 +; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 +; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 +; ZVFH-NEXT: vfmin.vv v8, v8, v20 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv16f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: sub sp, sp, a0 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmfeq.vv v1, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call @llvm.minimum.nxv16f16( %a, %b) ret %v } @@ -92,27 +189,100 @@ declare @llvm.minimum.nxv32f16(, ) define @vfmin_nxv32f16_vv( %a, %b) nounwind { -; CHECK-LABEL: vfmin_nxv32f16_vv: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v1, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v1 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv32f16_vv: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi sp, sp, -16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: sub sp, sp, a0 +; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmfeq.vv v1, v16, v16 +; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFH-NEXT: addi a0, sp, 16 +; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFH-NEXT: vmv1r.v v0, v1 +; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 +; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vfmin.vv v8, v8, v16 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add sp, sp, a0 +; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv32f16_vv: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: sub sp, sp, a0 +; ZVFHMIN-NEXT: vmv8r.v v24, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv8r.v v0, v8 +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmfeq.vv v1, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v8, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmin.vv v24, v8, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmfeq.vv v1, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v8, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v1 +; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v16, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: ret %v = call @llvm.minimum.nxv32f16( %a, %b) ret %v } @@ -293,38 +463,95 @@ } define @vfmin_nxv1f16_vv_nnan( %a, %b) { -; CHECK-LABEL: vfmin_nxv1f16_vv_nnan: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv1f16_vv_nnan: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFH-NEXT: vfmin.vv v8, v8, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv1f16_vv_nnan: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %v = call nnan @llvm.minimum.nxv1f16( %a, %b) ret %v } define @vfmin_nxv1f16_vv_nnana( %a, %b) { -; CHECK-LABEL: vfmin_nxv1f16_vv_nnana: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v9, v9 -; CHECK-NEXT: vmv1r.v v10, v9 -; CHECK-NEXT: vfadd.vv v10, v8, v8, v0.t -; CHECK-NEXT: vfmin.vv v8, v10, v9 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv1f16_vv_nnana: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 +; ZVFH-NEXT: vmv1r.v v10, v9 +; ZVFH-NEXT: vfadd.vv v10, v8, v8, v0.t +; ZVFH-NEXT: vfmin.vv v8, v10, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv1f16_vv_nnana: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %c = fadd nnan %a, %a %v = call @llvm.minimum.nxv1f16( %c, %b) ret %v } define @vfmin_nxv1f16_vv_nnanb( %a, %b) { -; CHECK-LABEL: vfmin_nxv1f16_vv_nnanb: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmv1r.v v10, v8 -; CHECK-NEXT: vfadd.vv v10, v9, v9, v0.t -; CHECK-NEXT: vfmin.vv v8, v8, v10 -; CHECK-NEXT: ret +; ZVFH-LABEL: vfmin_nxv1f16_vv_nnanb: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, mu +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmv1r.v v10, v8 +; ZVFH-NEXT: vfadd.vv v10, v9, v9, v0.t +; ZVFH-NEXT: vfmin.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_nxv1f16_vv_nnanb: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v10, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 +; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret %c = fadd nnan %b, %b %v = call @llvm.minimum.nxv1f16( %a, %c) ret %v